diff -u --recursive --new-file v2.1.98/linux/CREDITS linux/CREDITS --- v2.1.98/linux/CREDITS Thu Apr 23 20:21:27 1998 +++ linux/CREDITS Tue Apr 28 22:41:33 1998 @@ -568,6 +568,7 @@ E: rgooch@atnf.csiro.au D: parent process death signal to children D: prctl() syscall +D: /proc/mtrr support to manipulate MTRRs on Pentium Pro's S: CSIRO Australia Telescope National Facility S: P.O. Box 76, Epping S: N.S.W., 2121 diff -u --recursive --new-file v2.1.98/linux/Documentation/00-INDEX linux/Documentation/00-INDEX --- v2.1.98/linux/Documentation/00-INDEX Tue Mar 10 10:03:29 1998 +++ linux/Documentation/00-INDEX Tue Apr 28 14:22:03 1998 @@ -68,12 +68,12 @@ - list of magic numbers used to mark/protect kernel data structures. mandatory.txt - info on the linux implementation of Sys V mandatory file locking. +mca.txt + - info on supporting Micro Channel Architecture (e.g. PS/2) systems. md.txt - info on boot arguments for the multiple devices driver memory.txt - info on typical Linux memory problems. -mca.txt - - info on supporting Micro Channel Architecture (e.g. PS/2) systems. modules.txt - short guide on how to make kernel parts into loadable modules nbd.txt @@ -84,12 +84,14 @@ - short guide on setting up a diskless box with NFS root filesystem oops-tracing.txt - how to decode those nasty internal kernel error dump messages. -pcwd-watchdog.txt - - info and sample code for using with the PC Watchdog reset card. paride.txt - information about the parallel port IDE subsystem. parport.txt - how to use the parallel-port driver. +pci.txt + - info on the PCI subsystem for device driver authors +pcwd-watchdog.txt + - info and sample code for using with the PC Watchdog reset card. powerpc/ - directory with info on using linux with the PowerPC. ramdisk.txt @@ -104,8 +106,12 @@ - how to set up linux with a serial line console as the default. smart-config.txt - description of the Smart Config makefile feature. +smp + - how to setup the kernel for SMP smp.tex - TeX document describing implementation of Multiprocessor Linux +sound/ + - directory with info on sound card support specialix.txt - info on hardware/driver for specialix IO8+ multiport serial card. spinlocks.txt @@ -114,6 +120,10 @@ - info on using the Stallion multiport serial driver. svga.txt - short guide on selecting video modes at boot via VGA BIOS. +sysctl/ + - directory with info on the /proc/sys/* files +sysrq.txt + - info on the magic SysRq key transname.txt - how to use name translation to ease use of diskless systems. unicode.txt diff -u --recursive --new-file v2.1.98/linux/Documentation/ARM-README linux/Documentation/ARM-README --- v2.1.98/linux/Documentation/ARM-README Tue Jan 20 16:39:41 1998 +++ linux/Documentation/ARM-README Tue Apr 28 14:22:03 1998 @@ -89,12 +89,12 @@ to linux@arm.uk.linux.org. Patches will not be included into future kernels unless they come to me (or the relevant person concerned). - When sending bug reports, please ensure that they contain all relevent + When sending bug reports, please ensure that they contain all relevant information, eg. the kernel messages that were printed before/during the problem, what you were doing, etc. - For patches, please include some explaination as to what the patch does - and why (if relevent). + For patches, please include some explanation as to what the patch does + and why (if relevant). Modules ------- @@ -153,7 +153,7 @@ big external 5.25" FH 64MB drive (who could ever want more :-) ). I've just got 240K/s off it (a dd with bs=128k); thats about half of what - RiscOS gets; but its a heck of a lot better than the 50K/s I was getting + RiscOS gets; but it's a heck of a lot better than the 50K/s I was getting last week :-) Known bug: Drive data errors can cause a hang; including cases where diff -u --recursive --new-file v2.1.98/linux/Documentation/Changes linux/Documentation/Changes --- v2.1.98/linux/Documentation/Changes Tue Mar 17 22:18:13 1998 +++ linux/Documentation/Changes Tue Apr 28 22:48:57 1998 @@ -19,7 +19,8 @@ texinfo so a diff is useless anyway (though I can incorporate one by hand if you insist upon sending it that way ;-). - Check out http://www.cviog.uga.edu/Misc/info/LinuxBleed.html for an + Check out +http://www.mindspring.com/~nunez/info/linux/LinuxBleed.html for an HTML-ized shopping list. For those of you in Europe, @@ -32,7 +33,7 @@ Also, don't forget http://www.linuxhq.com/ for all your Linux kernel needs. -Last updated: March 16. 1998 +Last updated: April 27, 1998 Current Author: Chris Ricker (kaboom@gatech.edu). Current Minimal Requirements @@ -46,14 +47,14 @@ - Gnu C 2.7.2.3 ; gcc --version - Binutils 2.8.1.0.1 ; ld -v - Linux C Library 5.4.44 ; ls -l /lib/libc.so.* -- Dynamic Linker (ld.so) 1.9.5 ; ldd -v +- Dynamic Linker (ld.so) 1.9.5 ; ldd --version - Linux C++ Library 2.7.2.8 ; ls -l /usr/lib/libg++.so.* - Procps 1.2.5 ; ps --version -- Procinfo 0.13 ; procinfo -v +- Procinfo 13 ; procinfo -v - Mount 2.7l ; mount --version - Net-tools 1.41 ; hostname -V - Loadlin 1.6a -- Sh-utils 1.16 ; expr --v +- Sh-utils 1.16 ; basename --v - Autofs 0.3.11 ; automount --version - NFS 0.4.21 ; showmount --version - Bash 1.14.7 ; bash -version @@ -137,7 +138,11 @@ Note that the latest compilers (egcs, pgcc, gcc 2.8) may do Bad Things while compiling your kernel, particularly if absurd -optimizations (like -O9) are used. Caveat emptor. +optimizations (like -O9) are used. Caveat emptor. Currently, the only +C compiler available in a binary distribution is egcs. Version 1.0.2 +seems okay; if you have to have a binary, you may be successful using +that. In general, however, gcc-2.7.2.3 is known to be stable, while +egcs and others have not been as thoroughly tested yet. Networking Changes ================== @@ -163,8 +168,14 @@ To run bootpd, you'll need to issue the following command: echo 1 >/proc/sys/net/ipv4/ip_boot_agent + Similar procedures are necessary to turn on other features. If +something appears broken, check the /proc/sys/net/ipv4/ directory. "1" +generally denotes enabled, while "0" generally denotes disabled. + For support for new features like IPv6, upgrade to the latest -net-tools. +net-tools. This will also fix other problems. For example, the format +of /proc/net/dev changed; as a result, an older ifconfig will +incorrectly report errors. Memory ====== @@ -257,11 +268,11 @@ ======== The 2.8.1.0.1 release: -ftp://tsx-11.mit.edu/pub/linux/packages/GCC/binutils-2.8.1.0.1.bin.tar.gz -ftp://sunsite.unc.edu/pub/Linux/GCC/binutils-2.8.1.0.1.bin.tar.gz +ftp://tsx-11.mit.edu/pub/linux/packages/GCC/binutils-2.8.1.0.23.bin.tar.gz +ftp://sunsite.unc.edu/pub/Linux/GCC/binutils-2.8.1.0.23.bin.tar.gz Installation notes: -ftp://tsx-11.mit.edu/pub/linux/packages/GCC/release.binutils-2.8.1.0.1 -ftp://sunsite.unc.edu/pub/Linux/GCC/release.binutils-2.8.1.0.1 +ftp://tsx-11.mit.edu/pub/linux/packages/GCC/release.binutils-2.8.1.0.23 +ftp://sunsite.unc.edu/pub/Linux/GCC/release.binutils-2.8.1.0.23 Gnu C ===== @@ -273,13 +284,26 @@ ftp://tsx-11.mit.edu/pub/linux/packages/GCC/release.gcc-2.7.2.3 ftp://sunsite.unc.edu/pub/Linux/GCC/release.gcc-2.7.2.3 +The egcs-1.0.2 release: +ftp://tsx-11.mit.edu/pub/linux/packages/GCC/egcs-1.0.2-glibc.x86.tar.gz +ftp://tsx-11.mit.edu/pub/linux/packages/GCC/egcs-1.0.2-libc5.x86.tar.gz +ftp://sunsite.unc.edu/pub/Linux/GCC/egcs-1.0.2-glibc.x86.tar.gz +ftp://sunsite.unc.edu/pub/Linux/GCC/egcs-1.0.2-libc5.x86.tar.gz +Installation notes: +ftp://tsx-11.mit.edu/pub/linux/packages/GCC/release.egcs-1.0.2 +ftp://sunsite.unc.edu/pub/Linux/GCC/release.egcs-1.0.2 + +Gnu C 2.7.2.3 source: +ftp://prep.ai.mit.edu/pub/gnu/gcc-2.7.2.3.tar.gz +ftp://sunsite.unc.edu/pub/gnu/gcc-2.7.2.3.tar.gz + Linux C Library =============== The 5.4.44 release: ftp://tsx-11.mit.edu/pub/linux/packages/GCC/libc-5.4.44.bin.tar.gz ftp://sunsite.unc.edu/pub/Linux/GCC/libc-5.4.44.bin.tar.gz -Installation notes for 5.4.38: +Installation notes for 5.4.44: ftp://tsx-11.mit.edu/pub/linux/packages/GCC/release.libc-5.4.44 ftp://sunsite.unc.edu/pub/Linux/GCC/release.libc-5.4.44 @@ -304,21 +328,20 @@ ================= The 2.1.85 release: -ftp://ftp.redhat.com/pub/alphabits/modutils/modutils-2.1.85.tar.gz ftp://ftp.kernel.org/pub/linux/kernel/v2.1/modutils-2.1.85.tar.gz Procps utilities ================ The 1.2 release: -ftp://tsx-11.mit.edu/pub/linux/sources/usr.bin/procps-1.2.5.tar.gz -ftp://sunsite.unc.edu/pub/Linux/system/status/ps/procps-1.2.5.tgz +ftp://tsx-11.mit.edu/pub/linux/sources/usr.bin/procps-1.2.7.tar.gz +ftp://sunsite.unc.edu/pub/Linux/system/status/ps/procps-1.2.7.tgz Procinfo utilities ================== -The 0.11 release: -ftp://ftp.cistron.nl/pub/people/svm/procinfo-0.13.tar.gz +The 13 release: +ftp://ftp.cistron.nl/pub/people/svm/procinfo-13.tar.gz RPM utilities ============= @@ -337,8 +360,7 @@ ====== The 0.66.7 release: -ftp://tsx-11.mit.edu/pub/linux/ALPHA/dosemu/dosemu0.66.7.tgz -ftp://sunsite.unc.edu/pub/Linux/system/emulators/dosemu0.66.7.tgz +ftp://tsx-11.mit.edu/pub/linux/ALPHA/dosemu/dosemu-0.66.7.tgz Loadlin ======= @@ -376,8 +398,9 @@ Net-tools ========= -The 1.432 release: -ftp://ftp.cs-ipv6.lancs.ac.uk/pub/Code/Linux/Net_Tools/net-tools-1.432.tar.gz +The 1.45 release: +ftp://ftp.cs-ipv6.lancs.ac.uk/pub/Code/Linux/Net_Tools/net-tools-1.45.tar.gz +http://www.tazenda.demon.co.uk/phil/net-tools/net-tools-1.45.tar.gz Ypbind ====== @@ -396,7 +419,6 @@ The 2.1.1 release: ftp://ftp.gwdg.de/pub/linux/misc/ncpfs/ncpfs-2.1.1.tgz -ftp://sunsite.unc.edu/pub/Linux/system/Filesystems/ncpfs/ncpfs-2.1.1.tgz Pcmcia-cs ========= diff -u --recursive --new-file v2.1.98/linux/Documentation/Configure.help linux/Documentation/Configure.help --- v2.1.98/linux/Documentation/Configure.help Thu Apr 23 20:21:27 1998 +++ linux/Documentation/Configure.help Tue Apr 28 22:41:33 1998 @@ -1031,6 +1031,12 @@ works. If both PCI BIOS and direct PCI access are enabled, the use of BIOS is preferred. If unsure, say Y. +PCI quirks +CONFIG_PCI_QUIRKS + If you have a broken BIOS, it may fail to set up the PCI bus in a + correct or optimal fashion. If your BIOS is fine you can say N here + for a very slightly smaller kernel. If unsure, say Y. + PCI bridge optimization (experimental) CONFIG_PCI_OPTIMIZE This can improve access times for some hardware devices if you have @@ -6435,6 +6441,28 @@ it need not try to run the tape drive at the highest available speed. If unsure, leave this disabled, i.e. leave it at 2000 bits/sec. + +MTRR control and configuration +CONFIG_MTRR + On Intel Pentium Pro systems the Memory Type Range Registers (MTRRs) + may be used to control processor access to memory ranges. This is + most useful when you have a video (VGA) card on the PCI + bus. Enabling write-combining allows PCI write transfers to be + combined into a larger transfer before bursting over the PCI + bus. This can increase performance of image write operations 2.5 + times or more. + This option creates a /proc/mtrr file which may be used to manipulate + your MTRRs. Typically the X server should use this. This should have + a reasonably generic interface so that similar control registers on + other processors can be easily supported. + This option also fixes a problem with buggy SMP BIOSes which only + set the MTRRs for the boot CPU and not the secondary CPUs. This can + lead to all sorts of problems. + In general you should compile this into the kernel, rather than as a + loadable module, because the BIOS fix needs to be done early in the + boot sequence. If you compile this as a module, the BIOS fix will be + delayed until when you load the module. You do this at your own risk. + See Documentation/mtrr.txt for more information. Main CPU frequency, only for DEC alpha machine CONFIG_FT_ALPHA_CLOCK diff -u --recursive --new-file v2.1.98/linux/Documentation/IO-APIC.txt linux/Documentation/IO-APIC.txt --- v2.1.98/linux/Documentation/IO-APIC.txt Wed Feb 4 11:35:59 1998 +++ linux/Documentation/IO-APIC.txt Tue Apr 28 14:22:03 1998 @@ -43,7 +43,7 @@ A) if your board is unlisted, then mail to linux-smp to get it into either the white or the blacklist B) if your board is blacklisted, then figure out the apropriate - pirq= option to get your system boot + pirq= option to get your system to boot pirq= lines look like the following in /etc/lilo.conf: @@ -107,9 +107,9 @@ slots.] generally, it's always possible to find out the correct pirq= settings, just -permutate all IRQ numbers properly ... it will take some time though. An +permute all IRQ numbers properly ... it will take some time though. An 'incorrect' pirq line will cause the booting process to hang, or a device -wont function properly (if it's inserted as eg. a module). +won't function properly (if it's inserted as eg. a module). If you have 2 PCI buses, then you can use up to 8 pirq values. Although such boards tend to have a good configuration and will be included in the diff -u --recursive --new-file v2.1.98/linux/Documentation/binfmt_misc.txt linux/Documentation/binfmt_misc.txt --- v2.1.98/linux/Documentation/binfmt_misc.txt Fri Jan 23 18:10:31 1998 +++ linux/Documentation/binfmt_misc.txt Tue Apr 28 14:22:03 1998 @@ -1,8 +1,8 @@ Kernel Support for miscellaneous (your favourite) Binary Formats v1.1 ===================================================================== -This Kernel feature allows to invoke almost (for restrictions see below) every -program by simply typing its name in the shell. +This Kernel feature allows you to invoke almost (for restrictions see below) +every program by simply typing its name in the shell. This includes for example compiled Java(TM), Python or Emacs programs. To achieve this you must tell binfmt_misc which interpreter has to be invoked @@ -34,7 +34,7 @@ There are some restrictions: - the whole register string may not exceed 255 characters - - the magic must resist in the first 128 bytes of the file, i.e. + - the magic must reside in the first 128 bytes of the file, i.e. offset+size(magic) has to be less than 128 - the interpreter string may not exceed 127 characters diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/aztcd linux/Documentation/cdrom/aztcd --- v2.1.98/linux/Documentation/cdrom/aztcd Tue Dec 2 11:43:16 1997 +++ linux/Documentation/cdrom/aztcd Tue Apr 28 14:22:03 1998 @@ -202,7 +202,7 @@ 5.1 MULTISESSION SUPPORT Multisession support for CD's still is a myth. I implemented and tested a basic support for multisession and XA CDs, but I still have not enough CDs and appli- -cations to test it rigourously. So if you'd like to help me, please contact me +cations to test it rigorously. So if you'd like to help me, please contact me (Email address see below). As of version 1.4 and newer you can enable the multisession support in aztcd.h by setting AZT_MULTISESSION to 1. Doing so will cause the ISO9660-filesystem to deal with multisession CDs, ie. redirect @@ -375,7 +375,7 @@ the finite state machine in azt_poll(). The most important are the status messages, look how they are defined and try to understand, if they make sense in the context where they appear. With a CD-ROM inserted the status - should always be 8, except in aztcd_open(). Try to open the tray, insert a + should always be 8, except in aztcd_open(). Try to open the tray, insert an audio disk, insert no disk or reinsert the CD-ROM and check, if the status bits change accordingly. The status bits are the most likely point, where the drive manufacturers may implement changes. @@ -400,7 +400,7 @@ that the ACMD_SOFT_RESET is issued in any case, by substituting the if-statement 'if ( ...=AFL_OP_OK)' by 'if (1)'. -If you succeed, please mail may the exact version string of your drive and +If you succeed, please mail me the exact version string of your drive and the code modifications, you have made together with a short explanation. If you don't succeed, you may mail me the output of the debugging messages. But remember, they are only useful, if they are exact and complete and you @@ -439,13 +439,13 @@ code around function azt_poll() case AZT_S_MODE does work. In my test I have not been able to switch to reading in raw mode. For reading raw mode, Aztech uses a different command than for cooked mode, which I only have implemen- -ted in the ioctl-section but not in the section which is used by the ISO9660- +ted in the ioctl-section but not in the section which is used by the ISO9660. The driver was developed on an AST PC with Intel 486/DX2, 8MB RAM, 340MB IDE hard disk and on an AST PC with Intel Pentium 60MHz, 16MB RAM, 520MB IDE running Linux kernel version 1.0.9 from the LST 1.8 Distribution. The kernel was compiled with gcc.2.5.8. My CD-ROM drive is an Aztech CDA268-01A. My -drive says, that it has Firmware Version AZT26801A1.3. It came with a ISA-bus +drive says, that it has Firmware Version AZT26801A1.3. It came with an ISA-bus interface card and works with polled I/O without DMA and without interrupts. The code for all other drives was 'remote' tested and debugged by a number of volunteers on the Internet. @@ -508,7 +508,7 @@ /dev/aztcd0 in order to use it. Remember, that you should not have /dev/cdrom mounted, when you're playing audio CDs. -This program is just a hack for testing the ioctl-functions in aztcd.c, I will +This program is just a hack for testing the ioctl-functions in aztcd.c. I will not maintain it, so if you run into problems, discard it or have a look into the source code 'cdplay.c'. The program does only contain a minimum of user protection and input error detection. If you use the commands in the wrong @@ -517,11 +517,11 @@ error messages when using cdplay, after that, the system might not be stable any more, so you'd better reboot. As the ioctl-functions run in kernel mode, most normal Linux-multitasking protection features do not work. By using -uninitialized 'wild' pointers etc., it is easy to write to other users data and -program areas, destroy kernel tables etc.. So if you experiment with ioctls +uninitialized 'wild' pointers etc., it is easy to write to other users' data +and program areas, destroy kernel tables etc.. So if you experiment with ioctls as always when you are doing systems programming and kernel hacking, you should have a backup copy of your system in a safe place (and you also -should try before, how to restore from a backup copy)! +should try restoring from a backup copy first)! A reworked and improved version called 'cdtester.c', which has yet more features for testing CDROM-drives can be found in diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/cdrom-standard.tex linux/Documentation/cdrom/cdrom-standard.tex --- v2.1.98/linux/Documentation/cdrom/cdrom-standard.tex Wed Dec 31 11:34:15 1997 +++ linux/Documentation/cdrom/cdrom-standard.tex Tue Apr 28 14:22:04 1998 @@ -45,15 +45,15 @@ \end{itemize} The openness of \linux, and the many different types of available hardware has allowed \linux\ to support many different hardware devices. -Unfortunatly, the very openness that has allowed \linux\ to support +Unfortunately, the very openness that has allowed \linux\ to support all these different devices has also allowed the behavior of each device driver to differ significantly from one device to another. -This divergence of behavior has been the very significant for \cdrom\ +This divergence of behavior has been very significant for \cdrom\ devices; the way a particular drive reacts to a `standard' $ioctl()$ call varies greatly from one device driver to another. To avoid making their drivers totally inconsistent, the writers of \linux\ \cdrom\ drivers generally created new device drivers by understanding, copying, -and then changing an existing one. Unfortunatly, this practice did not +and then changing an existing one. Unfortunately, this practice did not maintain uniform behavior across all the \linux\ \cdrom\ drivers. This document describes an effort to establish Uniform behavior across @@ -85,7 +85,7 @@ set of commands and data formats.\footnote{I cannot recollect what kernel version I looked at, then, presumably 1.2.13 and 1.3.34---the latest kernel that I was indirectly involved in.} It seemed that many -features of the software interface had been added to accomodate the +features of the software interface had been added to accommodate the capabilities of a particular drive, in an {\fo ad hoc\/} manner. More importantly, it appeared that the behavior of the `standard' commands was different for most of the different drivers: \eg, some drivers @@ -93,7 +93,7 @@ others do not. Some drivers lock the door upon opening the device, to prevent an incoherent file system, but others don't, to allow software ejection. Undoubtedly, the capabilities of the different drives vary, -but even when two drives have the same capability their driver's +but even when two drives have the same capability their drivers' behavior was usually different. I decided to start a discussion on how to make all the \linux\ \cdrom\ @@ -109,7 +109,7 @@ The goal of the \UCD\ is {\em not\/} to alienate driver developers who have not yet taken steps to support this effort. The goal of \UCD\ is -simply is give people writing application programs for \cdrom\ drives +simply to give people writing application programs for \cdrom\ drives {\em one\/} \linux\ \cdrom\ interface with consistent behavior for all \cdrom\ devices. In addition, this also provides a consistent interface between the low-level device driver code and the \linux\ kernel. Care @@ -147,14 +147,14 @@ from the actual hardware implementation. Note that this effort has made few changes which will effect a user's application programs. The greatest change involved moving the contents of the various low-level -\cdrom\ driver's header files to the kernel's cdrom directory. This was +\cdrom\ drivers' header files to the kernel's cdrom directory. This was done to help ensure that the user is only presented with only one cdrom interface, the interface defined in \cdromh. \cdrom\ drives are specific enough (\ie, different from other block-devices such as floppy or hard disc drives), to define a set of common {\em \cdrom\ device operations}, $_dops$. -These operations are different than the classical block-device file +These operations are different from the classical block-device file operations, $_fops$. The routines for the \UCD\ interface level are implemented in the file @@ -267,7 +267,7 @@ Note that most functions have fewer parameters than their $blkdev_fops$ counterparts. This is because very little of the -information in the structures $inode$ and $file$ are used. For most +information in the structures $inode$ and $file$ is used. For most drivers, the main parameter is the $struct$ $cdrom_device_info$, from which the major and minor number can be extracted. (Most low-level \cdrom\ drivers don't even look at the major and minor number though, @@ -291,7 +291,7 @@ \noalign{\medskip} &int& options : 30;& options flags \cr &long& mc_flags : 2;& media-change buffer flags \cr - & int& use_count;& number of times devices is opened\cr + & int& use_count;& number of times device is opened\cr \}\cr }$$ Using this $struct$, a linked list of the registered minor devices is @@ -312,23 +312,23 @@ A few registers contain variables local to the \cdrom\ drive. The flags $options$ are used to specify how the general \cdrom\ routines should behave. These various flags registers should provide enough -flexibility to adapt to the different user's wishes (and {\em not\/} the +flexibility to adapt to the different users' wishes (and {\em not\/} the `arbitrary' wishes of the author of the low-level device driver, as is the case in the old scheme). The register $mc_flags$ is used to buffer the information from $media_changed()$ to two separate queues. Other -data that is specific to minor drive, can be accessed through $handle$, +data that is specific to a minor drive, can be accessed through $handle$, which can point to a data structure specific to the low-level driver. The fields $use_count$, $next$, $options$ and $mc_flags$ need not be initialized. -The intermediate software layer that \cdromc\ forms will performs some +The intermediate software layer that \cdromc\ forms will perform some additional bookkeeping. The use count of the device (the number of processes that have the device opened) is registered in $use_count$. The function $cdrom_ioctl()$ will verify the appropriate user-memory regions for read and write, and in case a location on the CD is transferred, it will `sanitize' the format by making requests to the low-level drivers in a standard format, and translating all formats between the -user-software and low level drivers. This relieves much of the drivers +user-software and low level drivers. This relieves much of the drivers' memory checking and format checking and translation. Also, the necessary structures will be declared on the program stack. @@ -469,7 +469,7 @@ sanitization goes even further: the low-level implementation may return the requested information in $CDROM_MSF$ format if it wishes so (setting the $ms_info\rightarrow addr_format$ field appropriately, of -course) and the routines in \cdromc\ will make the transform if +course) and the routines in \cdromc\ will make the transformation if necessary. The return value is 0 upon success. \subsection{$Int\ get_mcn(struct\ cdrom_device_info * cdi, struct\ @@ -498,7 +498,7 @@ Some of the \cdrom-$ioctl$s defined in \cdromh\ can be implemented by the routines described above, and hence the function $cdrom_ioctl$ will use those. However, most $ioctl$s deal with -audio-control. We have decided to leave these accessed through a +audio-control. We have decided to leave these to be accessed through a single function, repeating the arguments $cmd$ and $arg$. Note that the latter is of type $void*{}$, rather than $unsigned\ long\ int$. The routine $cdrom_ioctl()$ does do some useful things, @@ -532,7 +532,7 @@ so either the audio-file-system should ask for 75264 bytes at once (the least common multiple of 512 and 2352), or the drivers should bend their backs to cope with this incoherence (to which I would be -opposed). Furthermore, it it very difficult for the hardware to find +opposed). Furthermore, it is very difficult for the hardware to find the exact frame boundaries, since there are no synchronization headers in audio frames. Once these issues are resolved, this code should be standardized in \cdromc. @@ -562,7 +562,7 @@ CDC_SELECT_SPEED& can select speed, in units of $\sim$150\,kB/s\cr CDC_SELECT_DISC& drive is juke-box\cr CDC_MULTI_SESSION& can read sessions $>\rm1$\cr -CDC_MCN& can read Medium Catalog Number\cr +CDC_MCN& can read Media Catalog Number\cr CDC_MEDIA_CHANGED& can report if disc has changed\cr CDC_PLAY_AUDIO& can perform audio-functions (play, pause, etc)\cr CDC_RESET& hard reset device\cr @@ -724,12 +724,12 @@ \begin{description} \item[$CDO_AUTO_CLOSE \mathrel| CDO_USE_FFLAGS \mathrel| CDO_LOCK$] This is the default setting. (With $CDO_CHECK_TYPE$ it will be better, in the -future.) If the device is not yet opened by any other process, and it +future.) If the device is not yet opened by any other process, and if the device is being opened for data ($O_NONBLOCK$ is not set) and the tray is found to be open, an attempt to close the tray is made. Then, it is verified that a disc is in the drive and, if $CDO_CHECK_TYPE$ is set, that it contains tracks of type `data mode 1.' Only if all tests -are passed, the return value is zero. The door is locked to prevent file +are passed is the return value zero. The door is locked to prevent file system corruption. If the drive is opened for audio ($O_NONBLOCK$ is set), no actions are taken and a value of 0 will be returned. \item[$CDO_AUTO_CLOSE \mathrel| CDO_AUTO_EJECT \mathrel| CDO_LOCK$] This @@ -745,7 +745,7 @@ \newsection{Description of routines in \cdromc} Only a few routines in \cdromc\ are exported to the drivers. In this -newsection we will discuss these, as well as the functions that `take +new section we will discuss these, as well as the functions that `take over' the \cdrom\ interface to the kernel. The header file belonging to \cdromc\ is called \cdromh. Formerly, some of the contents of this file were placed in the file {\tt {ucdrom.h}}, but this file has now been @@ -833,7 +833,7 @@ \item[CDROMEJECT_SW] If $arg\not=0$, set behavior to auto-close (close tray on first open) and auto-eject (eject on last release), otherwise set behavior to non-moving on $open()$ and $release()$ calls. -\item[CDROM_GET_MCN or CDROM_GET_UPC] Get the Medium Catalog Number from a CD. +\item[CDROM_GET_MCN or CDROM_GET_UPC] Get the Media Catalog Number from a CD. \end{description} \subsubsection{$Ioctl$s routed through $audio_ioctl()$} @@ -878,7 +878,7 @@ \item[CDROM_SELECT_SPEED] Select head-rate speed of disc specified as by $arg$ in units of standard cdrom speed (176\,kB/sec raw data or 150\,kB/sec file system data). The value 0 means `auto-select', \ie, - play audio discs at real time and data disc at maximum speed. The value + play audio discs at real time and data discs at maximum speed. The value $arg$ is checked against the maximum head rate of the drive found in the $cdrom_dops$. \item[CDROM_SELECT_DISC] Select disc numbered $arg$ from a juke-box. @@ -887,18 +887,18 @@ \item[CDROM_MEDIA_CHANGED] Returns 1 if a disc has been changed since the last call. Note that calls to $cdrom_media_changed$ by the VFS are treated by an independent queue, so both mechanisms will detect - a media change once. For Juke-boxes, an extra argument $arg$ + a media change once. For juke-boxes, an extra argument $arg$ specifies the slot for which the information is given. The special value $CDSL_CURRENT$ requests that information about the currently - selected slot is returned. + selected slot be returned. \item[CDROM_DRIVE_STATUS] Returns the status of the drive by a call to $drive_status()$. Return values are defined in section~\ref{drive status}. Note that this call doesn't return information on the current playing activity of the drive; this can be polled through an - $ioctl$ call to $CDROMSUBCHNL$. For Juke-boxes, an extra argument + $ioctl$ call to $CDROMSUBCHNL$. For juke-boxes, an extra argument $arg$ specifies the slot for which (possibly limited) information is given. The special value $CDSL_CURRENT$ requests that information - about the currently selected slot is returned. + about the currently selected slot be returned. \item[CDROM_DISC_STATUS] Returns the type of the disc currently in the drive. It should be viewed as a complement to $CDROM_DRIVE_STATUS$. This $ioctl$ can provide \emph {some} information about the current @@ -996,7 +996,7 @@ \item Change the prototypes of $_open()$ and $_release()$, and remove any strategic code (\ie, tray movement, door locking, etc.). -\item Try to recompile the drivers. We advice you to use modules, both +\item Try to recompile the drivers. We advise you to use modules, both for {\tt {cdrom.o}} and your driver, as debugging is much easier this way. \end{enumerate} @@ -1004,7 +1004,7 @@ \newsection{Thanks} Thanks to all the people involved. First, Erik Andersen, who has -taken over the torch in maintaining \cdromc\ and integrating many +taken over the torch in maintaining \cdromc\ and integrating much \cdrom-related code in the 2.1-kernel. Thanks to Scott Snyder and Gerd Knorr, who were the first to implement this interface for SCSI and IDE-CD drivers and added many ideas for extension of the data diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/cdu31a linux/Documentation/cdrom/cdu31a --- v2.1.98/linux/Documentation/cdrom/cdu31a Thu Apr 11 23:49:29 1996 +++ linux/Documentation/cdrom/cdu31a Tue Apr 28 14:22:04 1998 @@ -32,7 +32,7 @@ Setting Up the Hardware ----------------------- -The CDU31A driver in unable to safely tell if an interface card is +The CDU31A driver is unable to safely tell if an interface card is present that it can use because the interface card does not announce its presence in any way besides placing 4 I/O locations in memory. It used to just probe memory and attempt commands, but Linus wisely asked @@ -44,7 +44,7 @@ soundcard. If you have the Sony CDU31A/CDU33A drive interface card, the following -diagram will help you set it up. If You have another card, you are on +diagram will help you set it up. If you have another card, you are on your own. You need to make sure that the I/O address and interrupt is not used by another card in the system. You will need to know the I/O address and interrupt you have set. Note that use of interrupts is diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/cm206 linux/Documentation/cdrom/cm206 --- v2.1.98/linux/Documentation/cdrom/cm206 Tue Dec 2 11:41:44 1997 +++ linux/Documentation/cdrom/cm206 Tue Apr 28 14:22:04 1998 @@ -14,7 +14,7 @@ - Full audio support, that is, both workman, workbone and cdp work now reasonably. Reading TOC still takes some time. xmcd has been reported to run successfully. -- Made auto-probe code a little better, i hope +- Made auto-probe code a little better, I hope Features since version 0.28 --------------------------- @@ -37,8 +37,8 @@ Further, you must decide if you are going to specify the base port address and the interrupt request line of the adapter card cm260 as boot options for (a), module parameters for (b), use automatic - probing of these values, or hard-wire your adaptor cards settings - into the source code. If you don't care, you can choose for + probing of these values, or hard-wire your adaptor card's settings + into the source code. If you don't care, you can choose autoprobing, which is the default. In that case you can move on to the next step. @@ -48,10 +48,10 @@ make config - If you have chosen for option (a), answer yes to CONFIG_CM206 and + If you have chosen option (a), answer yes to CONFIG_CM206 and CONFIG_ISO9660_FS. - If you have chosen for option (b), answer yes to CONFIG_MODVERSIONS + If you have chosen option (b), answer yes to CONFIG_MODVERSIONS and no (!) to CONFIG_CM206 and CONFIG_ISO9660_FS. 2) then do a @@ -64,7 +64,7 @@ Using the driver as a module ---------------------------- -If you will only seldomly use the cd-rom driver, you can choose for +If you will only occasionally use the cd-rom driver, you can choose option (b), install as a loadable module. You may have to re-compile the module when you upgrade the kernel to a new version. @@ -84,7 +84,7 @@ insmod /usr/src/linux/modules/cm206.o cm206=0x300,11 -The order of base port and irq line doesn't matter; you may specify only +The order of base port and irq line doesn't matter; if you specify only one, the other will have the value of the compiled-in default. You may also have to install the file-system module `iso9660.o', if you didn't compile that into the kernel. @@ -92,17 +92,17 @@ Using the driver as part of the kernel -------------------------------------- -If you have chosen for option a, you can specify the base-port +If you have chosen option (a), you can specify the base-port address and irq on the lilo boot command line, e.g.: LILO: linux cm206=0x340,11 This assumes that your linux kernel image keyword is `linux'. -If you may specify either IRQ (3--11) or base port (0x300--0x370), +If you specify either IRQ (3--11) or base port (0x300--0x370), auto probing is turned off for both settings, thus setting the other value to the compiled-in default. -Note that you can put these parameters also in the lilo configuration file: +Note that you can also put these parameters in the lilo configuration file: # linux config image = /vmlinuz @@ -122,7 +122,7 @@ Mounting the cdrom ------------------ -1) Make sure that there is the right device installed in /dev. +1) Make sure that the right device is installed in /dev. mknod /dev/cm206cd b 32 0 @@ -159,7 +159,7 @@ DISCLAIMER ---------- I cannot guarantee that this driver works, or that the hardware will -not be harmed, although i consider it most unlikely. +not be harmed, although I consider it most unlikely. I hope that you'll find this driver in some way useful. diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/gscd linux/Documentation/cdrom/gscd --- v2.1.98/linux/Documentation/cdrom/gscd Tue Dec 2 11:41:44 1997 +++ linux/Documentation/cdrom/gscd Tue Apr 28 14:22:04 1998 @@ -1,7 +1,7 @@ Goldstar R420 CD-Rom device driver README For all kind of other information about the GoldStar R420 CDROM -and this Linux device driver is a WWW-URL Page installed: +and this Linux device driver see the WWW page: http://linux.rz.fh-hannover.de/~raupach @@ -44,12 +44,12 @@ Before you can use the driver, you have to mknod /dev/gscd0 b 16 0 -to create the appropriate device file (once for all times). +to create the appropriate device file (you only need to do this once). If you use modules, you can try to insert the driver. Say: 'insmod /usr/src/linux/modules/gscd.o' or: 'insmod /usr/src/linux/modules/gscd.o gscd=
' -The driver should report his results now. +The driver should report its results. That's it! Mount a disk, i.e. 'mount -rt iso9660 /dev/gscd0 /cdrom' diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/ide-cd linux/Documentation/cdrom/ide-cd --- v2.1.98/linux/Documentation/cdrom/ide-cd Tue Mar 17 22:18:13 1998 +++ linux/Documentation/cdrom/ide-cd Tue Apr 28 14:22:04 1998 @@ -22,7 +22,7 @@ - Reading from data tracks, and mounting iso9660 filesystems. - Playing audio tracks. Most of the cdrom player programs floating - around should work; i usually use Workman. + around should work; I usually use Workman. - Multisession support. @@ -148,7 +148,7 @@ cdtester program in Documentation/cdrom/sbpcd. On a few drives, you can read digital audio directly using a program -such as cdda2wav. The only types of drive which i've heard support +such as cdda2wav. The only types of drive which I've heard support this are Sony and Toshiba drives. You will get errors if you try to use this function on a drive which does not support it. @@ -189,7 +189,7 @@ ioctl. The default is 8. TEST - This presently enables an additional ioctl which enables a user-mode + This currently enables an additional ioctl which enables a user-mode program to execute an arbitrary packet command. See the source for details. This should be left off unless you know what you're doing. @@ -271,7 +271,7 @@ and 15 for the secondary (0x1f0) interface.) Also be sure that you don't have some other hardware which might be conflicting with the IRQ you're using. Also check the BIOS setup for your system; - some have the ability to disable individual IRQ levels, and i've + some have the ability to disable individual IRQ levels, and I've had one report of a system which was shipped with IRQ 15 disabled by default. @@ -282,7 +282,7 @@ - If you own a Pioneer DR-A24X, you _will_ get nasty error messages on boot such as "irq timeout: status=0x50 { DriveReady SeekComplete }" The Pioneer DR-A24X cdrom drives are fairly popular these days. - Unfortunatly, these drives seem to become very confused when we perform + Unfortunately, these drives seem to become very confused when we perform the standard Linux ATA disk drive probe. If you own one of these drives, you can bypass the ATA probing which confuses these cdrom drives, by adding `append="hdX=noprobe hdX=cdrom"' to your lilo.conf file and runing @@ -377,7 +377,7 @@ /* * cdchange.c [-v] [] * - * This load a cdrom from a specified slot in a changer, and displays + * This loads a cdrom from a specified slot in a changer, and displays * information about the changer status. The drive should be unmounted before * using this program. * diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/isp16 linux/Documentation/cdrom/isp16 --- v2.1.98/linux/Documentation/cdrom/isp16 Tue Dec 2 11:41:44 1997 +++ linux/Documentation/cdrom/isp16 Tue Apr 28 14:22:04 1998 @@ -71,7 +71,7 @@ The syntax of the command line does not allow the specification of irq when there's nothing specified for the base address and no specification of dma when there is no specification of irq. -The value 'nosip16' for drive_type, which may be used as the first +The value 'noisp16' for drive_type, which may be used as the first non-integer option value (e.g. 'isp16=noisp16'), makes sure that probing for and subsequent configuration of an ISP16-compatible card is skipped all together. This can be useful to overcome possible conflicts which diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/mcdx linux/Documentation/cdrom/mcdx --- v2.1.98/linux/Documentation/cdrom/mcdx Wed Jun 5 00:47:05 1996 +++ linux/Documentation/cdrom/mcdx Tue Apr 28 14:22:04 1998 @@ -1,6 +1,6 @@ This is a first attempt to create an `improved' driver for the Mitsumi drives. It is able to "live together" with mcd.c, if you have at least two Mitsumi -drives: each driver can use his own drive. +drives: each driver can use its own drive. To allow this "coexistence" as long as mcdx.c is not a superset of mcd.c, this driver has to use its own device files. We use MAJOR 20 for it. So, diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/sbpcd linux/Documentation/cdrom/sbpcd --- v2.1.98/linux/Documentation/cdrom/sbpcd Tue Dec 2 11:41:44 1997 +++ linux/Documentation/cdrom/sbpcd Tue Apr 28 14:22:04 1998 @@ -4,7 +4,7 @@ sbpcd really, really is NOT for ANY IDE/ATAPI drive! Not even if you have an "original" SoundBlaster card with an IDE interface! -So, you better have a look into README.ide if your port address is 0x1F0, +So, you'd better have a look into README.ide if your port address is 0x1F0, 0x170, 0x1E8, 0x168 or similar. I get tons of mails from IDE/ATAPI drive users - I really can't continue any more to answer them all. So, if your drive/interface information sheets @@ -18,7 +18,7 @@ and get lucky. To make it fully clear to you: if you mail me about IDE/ATAPI drive problems, my answer is above, and I simply will discard your mail, hoping to stop the -flood and to find time to lead my 12-years old son towards happy computing. +flood and to find time to lead my 12-year old son towards happy computing. The driver is able to drive the whole family of "traditional" AT-style (that is NOT the new "Enhanced IDE" or "ATAPI" drive standard) Matsushita, @@ -29,13 +29,13 @@ The Longshine LCS-7260 is a double-speed drive which uses the "old" Matsushita command set. It is supported - with help by Serge Robyns. Vertos ("Elitegroup Computer Systems", ECS) has a similar drive - support -has started; come in contact if you have such a "Vertos 100" or "ECS-AT" +has started; get in contact if you have such a "Vertos 100" or "ECS-AT" drive. There exists an "IBM External ISA CD-ROM Drive" which in fact is a CR-563 with a special controller board. This drive is supported (the interface is of the "LaserMate" type), and it is possibly the best buy today (cheaper than -an internal drive, and you can use it as an internal, too - f.e. plug it into +an internal drive, and you can use it as an internal, too - e.g. plug it into a soundcard). CreativeLabs has a new drive "CD200" and a similar drive "CD200F". The latter @@ -51,7 +51,7 @@ to 64 (it is not recommended to do that for normal "file access" usage, but it can speed up things a lot if you use something like "dd" to read from the drive; I use it for verifying self-written CDs this way). -The drive itself is able to deliver 600 kB/sec, so this has to get a point of +The drive itself is able to deliver 600 kB/sec, so this needs work; with the normal setup, the performance currently is not even as good as double-speed. @@ -63,7 +63,7 @@ a complete idiot needs to understand your hassle already with your first mail. And if you want to say "as I have mailed you before", be sure that I don't remember your "case" by such remarks; at the moment, I have some -hundreds open correspondences about Linux CDROM questions (hope to reduce if +hundreds of open correspondences about Linux CDROM questions (hope to reduce if the IDE/ATAPI user questions disappear). @@ -79,7 +79,7 @@ If you have a sound card which needs a "configuration driver" instead of jumpers for interface types and addresses (like Mozart cards) - those drivers get invoked before the DOS CDROM driver in your CONFIG.SYS, typical -names are "cdsetup.sys" and "mztinit.sys" -, let the sound driver do the +names are "cdsetup.sys" and "mztinit.sys" - let the sound driver do the CDROM port configuration (the leading comments in linux/drivers/sound/mad16.c are just for you!). Hannu Savolainen's mad16.c code is able to set up my Mozart card - I simply had to add @@ -184,10 +184,10 @@ 1. Setup your hardware parameters. Though the driver does "auto-probing" at a lot of (not all possible!) addresses, this step is recommended for - every-day use. You should let sbpcd auto-probe once and use the reported + everyday use. You should let sbpcd auto-probe once and use the reported address if a drive got found. The reported type may be incorrect; it is correct if you can mount a data CD. There is no choice for you with the - type; only one is the right, the others are deadly wrong. + type; only one is right, the others are deadly wrong. a. Go into /usr/src/linux/drivers/cdrom/sbpcd.h and configure it for your hardware (near the beginning): @@ -229,7 +229,7 @@ second, third, or fourth controller installed, do not say "y" to the secondary Matsushita CD-ROM questions. -3. Then do a "make dep", then make the kernel image ("make zlilo" or else). +3. Then do a "make dep", then make the kernel image ("make zlilo" or similar). 4. Make the device file(s). This step usually already has been done by the MAKEDEV script. @@ -242,7 +242,7 @@ mknod /dev/sbpcd3 b 25 3 to make the node(s). - The "first found" drive gets MINOR 0 (regardless to its jumpered ID), the + The "first found" drive gets MINOR 0 (regardless of its jumpered ID), the "next found" (at the same cable) gets MINOR 1, ... For a second interface board, you have to make nodes like @@ -297,21 +297,21 @@ To reduce or increase the amount of kernel messages, edit sbpcd.c and play with the "DBG_xxx" switches (initialization of the variable "sbpcd_debug"). -Don't forget to reflect what you do; enabling all DBG_xxx switches at once +Don't forget to reflect on what you do; enabling all DBG_xxx switches at once may crash your system, and each message line is accompanied by a delay. The driver uses the "variable BLOCK_SIZE" feature. To use it, you have to specify "block=2048" as a mount option. Doing this will disable the direct execution of a binary from the CD; you have to copy it to a device with the -standard BLOCK_SIZE (1024) before. So, do not use this if your system is +standard BLOCK_SIZE (1024) first. So, do not use this if your system is directly "running from the CDROM" (like some of YGGDRASIL's installation variants). There are CDs on the market (like the german "unifix" Linux distribution) which MUST get handled with a block_size of 1024. Generally, one can say all the CDs which hold files of the name YMTRANS.TBL are defective; do not use block=2048 with those. -Within sbpcd.h, you will find some "#define"s (f.e. EJECT and JUKEBOX). With -that, you can configure the driver for some special things. +Within sbpcd.h, you will find some "#define"s (e.g. EJECT and JUKEBOX). With +these, you can configure the driver for some special things. You can use the appended program "cdtester" to set the auto-eject feature during runtime. Jeff Tranter's "eject" utility can do this, too (and more) for you. @@ -344,7 +344,7 @@ command line" feature and specify address & type at boot time to find out the right setup. -For every-day use, address and type should get configured within sbpcd.h. That +For everyday use, address and type should get configured within sbpcd.h. That will stop the auto-probing due to success with the first try. The kernel command "sbpcd=0" suppresses each auto-probing and causes @@ -373,7 +373,7 @@ interfaces, i.e. need SBPRO 0! With "original" SB Pro cards, an initial setting of CD_volume through the -sound cards MIXER register gets done. +sound card's MIXER register gets done. If you are using a "compatible" sound card of types "LaserMate" or "SPEA", you can set SOUND_BASE (in sbpcd.h) to get it done with your card, too... @@ -385,8 +385,8 @@ README.aztcd from the Aztech driver package) should work. The program CDplayer likes to talk to "/dev/mcd" only, xcdplayer wants -"/dev/rsr0", workman loves "/dev/sr0" or "/dev/cdrom" - so, do the appropriate -links for using them without the need of supplying parameters. +"/dev/rsr0", workman loves "/dev/sr0" or "/dev/cdrom" - so, make the +appropriate links to use them without the need to supply parameters. Copying audio tracks: diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/sjcd linux/Documentation/cdrom/sjcd --- v2.1.98/linux/Documentation/cdrom/sjcd Tue Dec 2 11:41:44 1997 +++ linux/Documentation/cdrom/sjcd Tue Apr 28 14:22:04 1998 @@ -1,10 +1,10 @@ -- Documentation/cdrom/sjcd 80% of the work takes 20% of the time, 20% of the work takes 80% of the time... - (Murphy law) + (Murphy's law) Once started, training can not be stopped... - (StarWars) + (Star Wars) This is the README for the sjcd cdrom driver, version 1.6. @@ -13,7 +13,7 @@ For info on configuring the ISP16 sound card look at Documentation/cdrom/isp16. The driver should work with any of the Panasonic, Sony or Mitsumi style -CDROM interface. +CDROM interfaces. The cdrom interface on Media Magic's soft configurable sound card ISP16, which used to be included in the driver, is now supported in a separate module. This initialisation module will probably also work with other interfaces diff -u --recursive --new-file v2.1.98/linux/Documentation/cdrom/sonycd535 linux/Documentation/cdrom/sonycd535 --- v2.1.98/linux/Documentation/cdrom/sonycd535 Tue Dec 2 11:41:44 1997 +++ linux/Documentation/cdrom/sonycd535 Tue Apr 28 14:22:04 1998 @@ -35,7 +35,7 @@ - Drive must be set up as unit 1. Only the first unit will be recognized - - you must enter your interface address into + - You must enter your interface address into /usr/src/linux/drivers/cdrom/sonycd535.h and build the appropriate kernel or use the "kernel command line" parameter sonycd535=0x320 diff -u --recursive --new-file v2.1.98/linux/Documentation/devices.tex linux/Documentation/devices.tex --- v2.1.98/linux/Documentation/devices.tex Mon Feb 23 18:12:01 1998 +++ linux/Documentation/devices.tex Tue Apr 28 14:22:04 1998 @@ -1148,11 +1148,11 @@ \end{devicelist} \noindent -Network Block Device is somehow similar to loopback devices: If you -read from it, it sends packet accross network asking server for -data. If you write to it, it sends packet telling server to write. It -could be used to mounting filesystems over the net, swapping over the -net, implementing block device in userland etc. +Network Block Device is somewhat similar to the loopback device: if you +read from it, it sends packets across the network asking a server for +data. If you write to it, it sends packets telling the server to write. It +could be used for mounting filesystems over the net, swapping over the +net, implementing block devices in userland etc. \begin{devicelist} \major{44}{}{char }{isdn4linux virtual modem -- alternate devices} @@ -1283,7 +1283,7 @@ \end{devicelist} \noindent -This device is used for the interfacing to the MC683xx +This device is used for interfacing to the MC683xx microcontrollers via Background Debug Mode by use of a Parallel Port interface. PD is the Motorola Public Domain Interface and ICD is the commercial interface by P\&E. @@ -1832,7 +1832,7 @@ Serial ports are RS-232 serial ports and any device which simulates one, either in hardware (such as internal modems) or in software (such -as the ISDN driver.) Under Linux, each serial ports has two device +as the ISDN driver.) Under Linux, each serial port has two device names, the primary or callin device and the alternate or callout one. Each kind of device is indicated by a different letter. For any letter $X$, the names of the devices are {\file /dev/tty${X\#}$} and diff -u --recursive --new-file v2.1.98/linux/Documentation/digiboard.txt linux/Documentation/digiboard.txt --- v2.1.98/linux/Documentation/digiboard.txt Fri Jan 30 15:50:56 1998 +++ linux/Documentation/digiboard.txt Tue Apr 28 14:22:04 1998 @@ -5,18 +5,18 @@ DigiBoard PC/Xi, PC/Xe, PC/Xeve(which is the newer, smaller Xe with a 8K window which is also known as PC/Xe(8K) and has no memory/irq - switches) You can use up to 4 cards with this driver and should work + switches) You can use up to 4 cards with this driver and it should work on other architectures than intel also. -In case you have problems with this version(1.6.1) of this driver, please +In case you have problems with this version (1.6.1) of this driver, please email directly to me as I made the last update. It you have a report about runnning it on other architectures than intel, email me, so I can document it here. -An version of this driver has been taken by Digiboard to make a driver +A version of this driver has been taken by Digiboard to make a driver software package which supports also PC/Xem cards and newer PCI cards -but it don't support the old PC/Xi cards and it isn't yet ported to -linux-2.1.x and may not be useable on other architectures than intel now. +but it doesn't support the old PC/Xi cards and it isn't yet ported to +linux-2.1.x and may not be usable on other architectures than intel now. It is available from ftp.digi.com/ftp.digiboard.com. You can write me if you need an patch for this driver. @@ -25,7 +25,7 @@ Configuring the Driver ---------------------- -The driver can be build direct into the kernel or as module. +The driver can be built direct into the kernel or as a module. The pcxx driver can be configured using the command line feature while loading the kernel with LILO or LOADLIN or, if built as a module, with arguments to insmod and modprobe or with parameters in @@ -66,14 +66,14 @@ membase - Memory start address of that card. memsize - Memory size of that card, in kilobytes. If given, this value is compared against the card to verify configuration and - hinder the driver to use a misconfigured card. If the parameter + hinder the driver from using a misconfigured card. If the parameter does not match the board it is disabled with a memory size error. numports - Number of ports on this card. This is the number of devices to assign to this card or reserve if disabled. altpin - 1: swap DCD and DSR for 8-pin RJ-45 with modems. 0: don't swap DCD and DSR. other values count as 1. -verbose - 1: give nice verbose output during initialisation of the driver. +verbose - 1: give nice verbose output during initialisation of the driver, possibly helpful during board configuration. 0: normal terse output. @@ -82,19 +82,19 @@ io=0x200 membase=0xD0000 numports=16 altpin=0 -Only parameters applicable need be specified. For example to configure +Only applicable parameters need be specified. For example to configure 2 boards, first one at 0x200 with 8 ports, rest defaults, second one at 0x120, memory at 0xD80000, altpin enabled, rest defaults, you can do this by using these parameters: modprobe pcxx io=0x200,0x120 numports=8,8 membase=,0xD80000 altpin=,1 -To disable a temporary unuseable board without changing the mapping of the +To disable a temporary unusable board without changing the mapping of the devices following that board, you can empty the io-value for that board: modprobe pcxx io=,0x120 numports=8,8 membase=,0xD80000 altpin=,1 -The remainig board still uses ttyD8-ttyD15 and cud8-cud15. +The remaining board still uses ttyD8-ttyD15 and cud8-cud15. Example line for /etc/conf.modules for use with kerneld and as default parameters for modprobe: @@ -120,7 +120,7 @@ Card type: PC/Xi - the old ones with 64/128/256/512K RAM. PC/Xe - PC/Xe(old ones with 64k mem range). - PC/Xeve - PC/Xe(newers with 8k mem range). + PC/Xeve - PC/Xe(new ones with 8k mem range). Note: This is for documentation only, the type is detected from the board. @@ -146,7 +146,8 @@ board 1: io=0x200, membase=0xd0000, altpin=off and numports=16 are used. If you have the resources (io&mem) free for use, configure your board to -these settings and you should be set up fine even if yours has not 16 ports. +these settings and you should be set up fine even if yours has not got 16 +ports. Sources of Information @@ -274,7 +275,7 @@ append="digi=E,PC/Xi,D,16,200,D0000" append="digi=1,0,0,16,512,(whatever D0000 is in base 10 :) -Driver's minor device numbers are conserved. This means that instead of +Drivers' minor device numbers are conserved. This means that instead of each board getting a block of 16 minors pre-assigned, it gets however many it should, with the next card following directly behind it. A system with 4 2-port PC/Xi boards will use minor numbers 0-7. diff -u --recursive --new-file v2.1.98/linux/Documentation/exception.txt linux/Documentation/exception.txt --- v2.1.98/linux/Documentation/exception.txt Mon Nov 11 01:07:43 1996 +++ linux/Documentation/exception.txt Tue Apr 28 14:22:04 1998 @@ -9,7 +9,7 @@ int verify_area(int type, const void * addr, unsigned long size) function. -This function verified, that the memory area starting at address +This function verified that the memory area starting at address addr and of size size was accessible for the operation specified in type (read or write). To do this, verify_read had to look up the virtual memory area (vma) that contained the address addr. In the @@ -53,7 +53,7 @@ Since we jump to the the contents of fixup, fixup obviously points to executable code. This code is hidden inside the user access macros. I have picked the get_user macro defined in include/asm/uacess.h as an -example. The definition is somewhat hard to follow, so lets peek at +example. The definition is somewhat hard to follow, so let's peek at the code generated by the preprocessor and the compiler. I selected the get_user call in drivers/char/console.c for a detailed examination. @@ -122,7 +122,7 @@ } ); -WOW! Black GCC/assembly magic. This is impossible to follow, so lets +WOW! Black GCC/assembly magic. This is impossible to follow, so let's see what code gcc generates: > xorl %edx,%edx @@ -266,7 +266,7 @@ 3.) CPU calls do_page_fault 4.) do page fault calls search_exception_table (regs->eip == c017e7a5); 5.) search_exception_table looks up the address c017e7a5 in the - exception table (i.e. the contents of the ELF section __ex_table + exception table (i.e. the contents of the ELF section __ex_table) and returns the address of the associated fault handle code c0199ff5. 6.) do_page_fault modifies its own return address to point to the fault handle code and returns. @@ -278,7 +278,7 @@ The steps 8a to 8c in a certain way emulate the faulting instruction. -That's it, mostely. If you look at our example, you might ask, why +That's it, mostly. If you look at our example, you might ask why we set EAX to -EFAULT in the exception handler code. Well, the get_user macro actually returns a value: 0, if the user access was successful, -EFAULT on failure. Our original code did not test this diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/00-INDEX linux/Documentation/filesystems/00-INDEX --- v2.1.98/linux/Documentation/filesystems/00-INDEX Thu Oct 23 14:00:14 1997 +++ linux/Documentation/filesystems/00-INDEX Tue Apr 28 14:22:04 1998 @@ -2,12 +2,19 @@ - this file (info on some of the filesystems supported by linux). affs.txt - info and mount options for the Amiga Fast File System. +coda.txt + - description of the CODA filesystem. +fat_cvf.txt + - Description of the Compressed Volume Files extension to the FAT + filesystem hpfs.txt - info and mount options for the OS/2 HPFS. isofs.txt - info and mount options for the ISO9660 (CDROM) filesystem. ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. +ntfs.txt + - info and mount options for the NTFS filesystem (Win NT). romfs.txt - Description of the ROMFS filesystem. smbfs.txt @@ -18,3 +25,5 @@ - info on the umsdos extensions to the msdos filesystem. vfat.txt - info on using the VFAT filesystem used in Win NT and Win 95 +vfs.txt + - Overview of the Virtual File System diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/affs.txt linux/Documentation/filesystems/affs.txt --- v2.1.98/linux/Documentation/filesystems/affs.txt Tue Mar 10 10:03:30 1998 +++ linux/Documentation/filesystems/affs.txt Tue Apr 28 14:22:04 1998 @@ -120,8 +120,8 @@ Although the Amiga and Linux file systems resemble each other, there are some, not always subtle, differences. One of them becomes apparent with symbolic links. While Linux has a file system with exactly one -root directory, the Amiga has a seperate root directory for each -file system (i. e. partition, floppy disk, ...). With the Amiga, +root directory, the Amiga has a separate root directory for each +file system (e.g. partition, floppy disk, ...). With the Amiga, these entities are called "volumes". They have symbolic names which can be used to access them. Thus, symbolic links can point to a different volume. AFFS turns the volume name into a directory name @@ -156,7 +156,7 @@ Filenames are truncated to 30 characters without warning (this can be changed by setting the compile-time option AFFS_NO_TRUNCATE -ina include/linux/amigaffs.h). +in include/linux/amigaffs.h). Case is ignored by the affs in filename matching, but Linux shells do care about the case. Example (with /mnt being an affs mounted fs): diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/coda.txt linux/Documentation/filesystems/coda.txt --- v2.1.98/linux/Documentation/filesystems/coda.txt Tue Mar 10 10:03:30 1998 +++ linux/Documentation/filesystems/coda.txt Tue Apr 28 14:22:04 1998 @@ -28,7 +28,7 @@ This document describes the communication between Venus and kernel level file system code needed for the operation of the Coda filesys- - tem. This version document is meant to describe the current interface + tem. This document version is meant to describe the current interface (version 1.0) as well as improvements we envisage. ______________________________________________________________________ @@ -161,7 +161,7 @@ client cache and makes remote procedure calls to Coda file servers and related servers (such as authentication servers) to service these requests it receives from the operating system. When Venus has - serviced a request it replies to the operating system with appropiate + serviced a request it replies to the operating system with appropriate return codes, and other data related to the request. Optionally the kernel support for Coda may maintain a minicache of recently processed requests to limit the number of interactions with Venus. Venus @@ -218,10 +218,10 @@ as applicable in the operating system. These differ very significantly among operating systems, but share features such as facilities to read/write and create and remove objects. The Coda FS layer services - such VFS requests in by invoking on or more well defined services + such VFS requests by invoking one or more well defined services offered by the cache manager Venus. When the replies from Venus have come back to the FS driver, servicing of the VFS call continues and - finishes with a reply to the kernels VFS. Finally the VFS layer + finishes with a reply to the kernel's VFS. Finally the VFS layer returns to the process. As a result of this design a basic interface exposed by the FS driver @@ -277,7 +277,7 @@ FS Driver in kernel memory on behalf of P and copied to user memory in Venus. - The FS Driver while servicing P makes upcall's to Venus. Such an + The FS Driver while servicing P makes upcalls to Venus. Such an upcall is dispatched to Venus by creating a message structure. The structure contains the identification of P, the message sequence number, the size of the request and a pointer to the data in kernel @@ -289,7 +289,7 @@ synchronization objects. In the upcall routine the message structure is filled in, flags are set to 0, and it is placed on the _p_e_n_d_i_n_g queue. The routine calling upcall is responsible for allocating the - data buffer; it's structure will be described in the next section. + data buffer; its structure will be described in the next section. A facility must exist to notify Venus that the message has been created, and implemented using available synchronization objects in @@ -323,15 +323,15 @@ +o The message is a _d_o_w_n_c_a_l_l. A downcall is a request from Venus to the FS Driver. The FS driver processes the request immediately - (usually a cache eviction or replacement) and when finishes + (usually a cache eviction or replacement) and when it finishes sendmsg_to_kernel returns. Now P awakes and continues processing upcall. There are some - subtleties to take account off. First P will determine if it was woken + subtleties to take account of. First P will determine if it was woken up in upcall by a signal from some other source (for example an attempt to terminate P) or as is normally the case by Venus in its sendmsg_to_kernel call. In the normal case, the upcall routine will - deallocate message structure and return. The FS routine can proceed + deallocate the message structure and return. The FS routine can proceed with its processing. @@ -344,7 +344,7 @@ In case P is woken up by a signal and not by Venus, it will first look at the flags field. If the message is not yet READ, the process P can - handle it's signal without notifying Venus. If Venus has READ, and + handle its signal without notifying Venus. If Venus has READ, and the request should not be processed, P can send Venus a signal message to indicate that it should disregard the previous message. Such signals are put in the queue at the head, and read first by Venus. If @@ -407,7 +407,7 @@ Before going on let us elucidate the role of the various fields. The inputArgs start with the opcode which defines the type of service requested from Venus. There are approximately 30 upcalls at present - which we will discuss. The unique field labels the inputArg with + which we will discuss. The unique field labels the inputArg with a unique number which will identify the message uniquely. A process and process group id are passed. Finally the credentials of the caller are included. @@ -421,9 +421,9 @@ 44..11.. DDaattaa ssttrruuccttuurreess sshhaarreedd bbyy tthhee kkeerrnneell aanndd VVeennuuss - The CodaCred structure defines a variety of user and group id's as + The CodaCred structure defines a variety of user and group ids as they are set for the calling process. The vuid_t and guid_t are 32 bit - unsigned integers. It also defines group member ship in an array. On + unsigned integers. It also defines group membership in an array. On Unix the CodaCred has proven sufficient to implement good security semantics for Coda but the structure may have to undergo modification for the Windows environment when these mature. @@ -462,7 +462,7 @@ to be prefixed to identify the Coda cell; this will probably take the form of a Ipv6 size IP address naming the Coda cell through DNS. - The next important structure shared between Venus and the kernel are + The next important structure shared between Venus and the kernel is the attributes of the file. The following structure is used to exchange information. It has room for future extensions such as support for device files (currently not present in Coda). @@ -514,7 +514,7 @@ Coda specific requests can be made by application through the pioctl interface. The pioctl is implemented as an ordinary ioctl on a - ficticious file /coda/.CONTROL. The piocl call opens this file, gets + ficticious file /coda/.CONTROL. The pioctl call opens this file, gets a file handle and makes the ioctl call. Finally it closes the file. The kernel involvement in this is limited to providing the facility to @@ -614,7 +614,7 @@ The name of the object is an 8 bit character string of maximum length CFS_MAXNAMLEN, currently set to 256 (including a 0 terminator.) - It is extremely important to realize that Venus bitwise or's the field + It is extremely important to realize that Venus bitwise ors the field cfs_lookup.vtype with CFS_NOCACHE to indicate that the object should not be put in the kernel name cache. @@ -650,11 +650,11 @@ DDeessccrriippttiioonn This call returns the attributes of the file identified by fid. - EErrrroorrss Errors can occur if the object with fid does not exist, are + EErrrroorrss Errors can occur if the object with fid does not exist, is unaccessible or if the caller does not have permission to fetch attributes. - NNoottee Many kernel FS drivers (Linux, NT and Windows 95 need to acquire + NNoottee Many kernel FS drivers (Linux, NT and Windows 95) need to acquire the attributes as well as the Fid for the instantiation of an internal "inode" or "FileHandle". A significant improvement in performance on such systems could be made by combining the _l_o_o_k_u_p and _g_e_t_a_t_t_r calls @@ -689,7 +689,7 @@ in BSD style. Attributes not to be changed are set to -1, apart from vtype which is set to VNON. Other are set to the value to be assigned. The only attributes which the FS driver may request to change are the - mode, ownner, groupid, atime, mtime and ctime. The return value + mode, owner, groupid, atime, mtime and ctime. The return value indicates success or failure. EErrrroorrss A variety of errors can occur. The object may not exist, may @@ -719,7 +719,7 @@ DDeessccrriippttiioonn Verify if access to the object identified by VFid for operations described by flags is permitted. The result indicates if access will be granted. It is important to remember that Coda uses - ACL's to enforce protection and that ultimately the servers, not the + ACLs to enforce protection and that ultimately the servers, not the clients enforce the security of the system. The result of this call will depend on wether a _t_o_k_e_n is held by the user. @@ -851,7 +851,7 @@ DDeessccrriippttiioonn This call creates a link to the sourceFid in the directory identified by destFid with name tname. The source must reside in the - targets parent, i.e. the source must be have parent destFid, i.e. Coda + target's parent, i.e. the source must be have parent destFid, i.e. Coda does not support cross directory hard links. Only the return value is relevant. It indicates success or the type of failure. @@ -1015,7 +1015,7 @@ EErrrroorrss NNOOTTEE Currently the cfs_open_out structure is not properly adapted to - deal with the windows case. It might be best to implement two + deal with the Windows case. It might be best to implement two upcalls, one to open aiming at a container file name, the other at a container file inode. @@ -1051,7 +1051,7 @@ fetching the data in Venus vproc_vfscalls. This seems silly. If a file is being closed, the data in the container file is to be the new data. Here again the execp flag might be in play to create confusion: - presently Venus might think a file can be flushed from the cache when + currently Venus might think a file can be flushed from the cache when it is still memory mapped. This needs to be understood. 0wpage @@ -1059,7 +1059,7 @@ 44..1177.. iiooccttll - SSuummmmaarryy Do an ioctl on a file. This includes the piocl interface. + SSuummmmaarryy Do an ioctl on a file. This includes the pioctl interface. AArrgguummeennttss @@ -1091,7 +1091,7 @@ EErrrroorrss NNOOTTEE Another bogus parameter. flags is not used. What is the - business about PREFETCHING in the Venus' code? + business about PREFETCHING in the Venus code? 0wpage @@ -1154,8 +1154,8 @@ DDeessccrriippttiioonn Read directory entries from VFid starting at offset and - read at most count bytes. Returns the data into data and indicates - the size returned size. + read at most count bytes. Returns the data in data and returns + the size in size. EErrrroorrss @@ -1196,7 +1196,7 @@ NNOOTTEE This operation is not used. However, it is extremely useful since it can be used to deal with read/write memory mapped files. - These can be "pinned" in the Venus cache using vget and release with + These can be "pinned" in the Venus cache using vget and released with inactive. 0wpage @@ -1219,8 +1219,8 @@ oouutt none - DDeessccrriippttiioonn Ask Venus to update RVM attributes of object VFid. This - should be called as part of kernel level fsync type calls. The + DDeessccrriippttiioonn Ask Venus to update RVM attributes of object VFid. This + should be called as part of kernel level fsync type calls. The result indicates if the synching was successful. EErrrroorrss @@ -1452,7 +1452,7 @@ 4. the cnode of the object The lookup call in the Coda FS Driver may request the cnode of the - desired object from the cache, by passing it's name, directory and the + desired object from the cache, by passing its name, directory and the CodaCred's of the caller. The cache will return the cnode or indicate that it cannot be found. The Coda FS Driver must be careful to invalidate cache entries when it modifies or removes objects. @@ -1496,7 +1496,7 @@ DDeessccrriippttiioonn Remove all entries in the cache carrying the Cred. This - call is issued when tokes for a user expire or are flushed. + call is issued when tokens for a user expire or are flushed. 55..44.. ZZAAPPFFIILLEE @@ -1567,7 +1567,7 @@ DDeessccrriippttiioonn Flush the attribute for the file. If it is a dir (odd - vnode), purge its children from the namecache remove the file from the + vnode), purge its children from the namecache and remove the file from the namecache. @@ -1589,7 +1589,7 @@ DDeessccrriippttiioonn This routine replaces a ViceFid in the name cache with another. It is added to allow Venus during reintegration to replace locally allocated temp fids while disconnected with global fids even - when the reference count on those fids are not zero. + when the reference counts on those fids are not zero. 0wpage @@ -1629,7 +1629,7 @@ 66..11.. RReeqquuiirreemmeennttss - The following requirements should be accomodated: + The following requirements should be accommodated: 1. The message queueus should have open and close routines. On Unix the opening of the character devices are such routines. @@ -1659,7 +1659,7 @@ 6. All memory held by cnodes can be freed without relying on upcalls. - 7. Unmounting the file system can be done without relying on upcalss. + 7. Unmounting the file system can be done without relying on upcalls. 8. Mounting the Coda filesystem should fail gracefully if Venus cannot get the rootfid or the attributes of the rootfid. The latter is diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/fat_cvf.txt linux/Documentation/filesystems/fat_cvf.txt --- v2.1.98/linux/Documentation/filesystems/fat_cvf.txt Thu Jan 8 14:02:41 1998 +++ linux/Documentation/filesystems/fat_cvf.txt Tue Apr 28 14:22:04 1998 @@ -34,7 +34,7 @@ - BMAP problems - CVF filesystems cannot do bmap. It's impossible by principle. Thus + CVF filesystems cannot do bmap. It's impossible in principle. Thus all actions that require bmap do not work (swapping, writable mmapping). Read-only mmapping works because the FAT driver has a hack for this situation :) Well, with some tricks writable mmapping could work, @@ -66,7 +66,7 @@ cvf_format=xxx Forces the driver to use the CVF module "xxx" instead of auto-detection. - This is only necessary if the CVF format is not recognized corrrectly + This is only necessary if the CVF format is not recognized correctly because of bugs or incompatibilities in the CVF modules. (It skips the detect_cvf call.) "xxx" may be the text "none" (without the quotes) to inhibit using any of the loaded CVF modules, just in case a CVF @@ -80,7 +80,7 @@ misinterpretation by the FAT driver, which would recognize the text after a comma as a FAT driver option and might get confused or print strange error messages. The documentation for the CVF module should - offer a different seperation symbol, for example the dot ".", which + offer a different separation symbol, for example the dot ".", which is only valid inside the string "yyy". @@ -109,11 +109,11 @@ It contains... - cvf_version: - A version id which must be uniqe. Choose one. + A version id which must be unique. Choose one. - cvf_version_text: A human readable version string that should be one short word describing the CVF format the module implements. This text is used - for the cvf_format option. This name must also be uniqe. + for the cvf_format option. This name must also be unique. - flags: Bit coded flags, currently only used for a readpage/mmap hack that provides both mmap and readpage functionality. If CVF_USE_READPAGE @@ -178,7 +178,7 @@ This is usually called in cleanup_module. Return value =0 means success. An error only occurs if you try to unregister a CVF format that has not been previously registered. The code uses the version id - to distinguish the modules, so be sure to keep it uniqe. + to distinguish the modules, so be sure to keep it unique. 5. CVS Modules ------------------------------------------------------------------------------ diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/ntfs.txt linux/Documentation/filesystems/ntfs.txt --- v2.1.98/linux/Documentation/filesystems/ntfs.txt Fri Dec 19 15:24:20 1997 +++ linux/Documentation/filesystems/ntfs.txt Tue Apr 28 14:22:04 1998 @@ -5,9 +5,9 @@ currently works only in read-only mode, with no fault-tolerance supported. If you enable the experimental write support, make sure you can recover from a complete loss of data. For ftdisk support, -limit success was reported with volume sets on top of the md driver, +limited success was reported with volume sets on top of the md driver, although mirror and stripe sets should work as well - if the md -driver can be talked into using the same lay-out as Windows NT. +driver can be talked into using the same layout as Windows NT. The ntfs driver supports the following mount options: iocharset=name Character set to use when returning file names. diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/romfs.txt linux/Documentation/filesystems/romfs.txt --- v2.1.98/linux/Documentation/filesystems/romfs.txt Wed Jul 23 10:38:24 1997 +++ linux/Documentation/filesystems/romfs.txt Tue Apr 28 14:22:04 1998 @@ -22,7 +22,7 @@ its mirrors, in the /pub/Linux/system/recovery/ directory. As the name suggests, romfs could be also used (space-efficiently) on -various read-only medias, like (E)EPROM disks if someone will have the +various read-only media, like (E)EPROM disks if someone will have the motivation.. :) However, the main purpose of romfs is to have a very small kernel, @@ -79,7 +79,7 @@ inspector. After that, in the 3rd longword, it contains the number of bytes accessible from the start of this filesystem. The 4th longword is the checksum of the first 512 bytes (or the number of bytes -accessible, whichever is smallest). The applied algorithm is the same +accessible, whichever is smaller). The applied algorithm is the same as in the AFFS filesystem, namely a simple sum of the longwords (assuming bigendian quantities again). For details, please consult the source. This algorithm was chosen because although it's not quite diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/umsdos.txt linux/Documentation/filesystems/umsdos.txt --- v2.1.98/linux/Documentation/filesystems/umsdos.txt Wed Apr 8 19:36:24 1998 +++ linux/Documentation/filesystems/umsdos.txt Tue Apr 28 14:22:04 1998 @@ -14,13 +14,13 @@ It gives you: - long file name - Permissions and owner + long file names + Permissions and owners Links - Special files (devices, pipe...) - All is need to be a linux root fs. + Special files (devices, pipes...) + All that is needed to be a linux root fs. -There is plenty of documentation on it in the source. A formated document +There is plenty of documentation on it in the source. A formatted document made from those comments is available from sunsite.unc.edu:/pub/Linux/system/Filesystems/umsdos. @@ -32,21 +32,21 @@ ^ ---------| -All option are passed to the msdos drivers. Option like uid,gid etc are +All options are passed to the msdos drivers. Option like uid,gid etc are given to msdos. The default behavior of Umsdos is to do the same thing as the msdos driver mostly passing commands to it without much processing. Again, this is the default. After doing the mount on a DOS partition, nothing special -happen. This is why all mount options are passed to the Msdos fs driver. +happens. This is why all mount options are passed to the msdos fs driver. -Umsdos use a special DOS file --linux-.--- to store the information +Umsdos uses a special DOS file --linux-.--- to store the information which can't be handle by the normal MsDOS file system. This is the trick. --linux-.--- is optional. There is one per directory. **** If --linux-.--- is missing, then Umsdos process the directory the - same way the msdos driver do. Short file name, no goodies, default + same way the msdos driver does. Short file names, no goodies, default owner and permissions. So each directory may have or not this --linux-.--- @@ -59,7 +59,7 @@ $5 per directory. Add any applicable taxes. \end joke_section -A utility umssync creates those. The kernel maintain them. It is available +A utility umssync creates those. The kernel maintains them. It is available from the same directory above (sunsite) in the file umsdos_progs-0.7.tar.gz. A compiled version is available in umsdos_progs-0.7.bin.tar.gz. @@ -69,20 +69,20 @@ This will promote this directory (a recursive option is available) to full umsdos capabilities (long name ...). A ls -l before and after won't show -much difference however. The file which were there are still there. But now +much difference however. The files which were there are still there. But now you can do all this: chmod 644 * - chown you.your_groupe * + chown you.your_group * ls >THIS_IS.A.VERY.LONG.NAME ln -s toto tata ls -l -Once a directory is promoted, all subdirectory created will inherit that +Once a directory is promoted, all subdirectories created will inherit that promotion. -What happen if you boot DOS and create files in those promoted directories ? -Umsdos won't notice new files, but will signal removed file (it won't crash). +What happens if you boot DOS and create files in those promoted directories ? +Umsdos won't notice new files, but will signal removed files (it won't crash). Using umssync in /etc/rc will make sure the DOS directory is in sync with the --linux-.---. @@ -95,8 +95,8 @@ (You put one for each umsdos mount point in the fstab) This will insure nice operation. A umsdos.fsck is in the making, -so you will be allowed to managed umsdos partition in the same way -other filesystem are, using the generic fsck front end. +so you will be allowed to manage umsdos partitions in the same way +other filesystems are, using the generic fsck front end. Hope this helps! diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/vfat.txt linux/Documentation/filesystems/vfat.txt --- v2.1.98/linux/Documentation/filesystems/vfat.txt Sat Dec 20 21:33:20 1997 +++ linux/Documentation/filesystems/vfat.txt Tue Apr 28 14:22:04 1998 @@ -71,7 +71,7 @@ * vfat_valid_longname does not properly checked reserved names. * When a volume name is the same as a directory name in the root directory of the filesystem, the directory name sometimes shows - up empty an empty file. + up as an empty file. * autoconv option does not work correctly. BUG REPORTS @@ -103,7 +103,7 @@ The extended FAT file system is almost identical to the FAT file system used in DOS versions up to and including 6.223410239847 :-). The significant change has been the addition of long file names. -Theses names support up to 255 characters including spaces and lower +These names support up to 255 characters including spaces and lower case characters as opposed to the traditional 8.3 short names. Here is the description of the traditional FAT entry in the current @@ -142,7 +142,7 @@ legally fits within the old 8.3 encoding scheme does not have extra entries.) I call these extra entries slots. Basically, a slot is a specially formatted directory entry which holds up to 13 characters of -a files extended name. Think of slots as additional labeling for the +a file's extended name. Think of slots as additional labeling for the directory entry of the file to which they correspond. Microsoft prefers to refer to the 8.3 entry for a file as its alias and the extended slot directory entries as the file name. @@ -163,7 +163,7 @@ If the layout of the slots looks a little odd, it's only because of Microsoft's efforts to maintain compatibility with old software. The slots must be disguised to prevent old software from -panicing. To this end, a number of measures are taken: +panicking. To this end, a number of measures are taken: 1) The attribute byte for a slot directory entry is always set to 0x0f. This corresponds to an old directory entry with @@ -206,9 +206,9 @@ sum = (((sum&1)<<7)|((sum&0xfe)>>1)) + name[i] } - 3) If there is in the final slot, a Unicode NULL (0x0000) is stored - after the final character. After that, all unused characters in - the final slot are set to Unicode 0xFFFF. + 3) If there is free space in the final slot, a Unicode NULL (0x0000) + is stored after the final character. After that, all unused + characters in the final slot are set to Unicode 0xFFFF. Finally, note that the extended name is stored in Unicode. Each Unicode character takes two bytes. diff -u --recursive --new-file v2.1.98/linux/Documentation/filesystems/vfs.txt linux/Documentation/filesystems/vfs.txt --- v2.1.98/linux/Documentation/filesystems/vfs.txt Tue Mar 10 10:03:30 1998 +++ linux/Documentation/filesystems/vfs.txt Tue Apr 28 14:22:04 1998 @@ -5,7 +5,7 @@ Noone else seems to be writing this, so here's a quick description of what I've learned while writing lofs... -The VFS relatively simple, but it is nice not to have to browse through +The VFS is relatively simple, but it is nice not to have to browse through pages of code to determine what is expected when writing a filesystem. Hopefully this helps anyone attempting such a feat, as well as clearing up a few important points/dependencies. @@ -133,9 +133,9 @@ int (*follow_link) (struct inode *,struct inode *,int,int,struct inode **); [optional] - The follow_link function is only nescessary if a filesystem uses a really + The follow_link function is only necessary if a filesystem uses a really twisted form of symbolic links - namely if the symbolic link comes from a - foriegn filesystem that makes no sense.... + foreign filesystem that makes no sense.... I threw this one out - too much redundant code! int (*readpage) (struct inode *, struct page *); [optional] diff -u --recursive --new-file v2.1.98/linux/Documentation/ftape.txt linux/Documentation/ftape.txt --- v2.1.98/linux/Documentation/ftape.txt Tue Nov 25 14:45:26 1997 +++ linux/Documentation/ftape.txt Tue Apr 28 14:22:04 1998 @@ -6,7 +6,7 @@ document deals with ftape-3.04 and later. Please read the section "Changes" for the most striking differences between version 3.04 and 2.08; the latter was the version of ftape delivered with the kernel -until kernel version 2.030 and 2.1.57. ftape-3.x developed as the +until kernel version 2.0.30 and 2.1.57. ftape-3.x developed as the re-unification of ftape-2.x and zftape. zftape was developed in parallel with the stock ftape-2.x driver sharing the same hardware support but providing an enhanced file system interface. zftape also @@ -54,7 +54,7 @@ ============================== Unluckily, the ftape-HOWTO is out of date. This really needs to be -changed. Up to data documentation as well as recent development +changed. Up to date documentation as well as recent development versions of ftape and useful links to related topics can be found at the ftape home page at @@ -245,7 +245,7 @@ insmod ftape.o ft_tracing=4 or by editing the file `/etc/conf.modules' in which case they take - affect each time when the module is loaded with `modprobe' (please + effect each time when the module is loaded with `modprobe' (please refer to the modules documentation, i.e. `modules.txt' and the respective manual pages). Thus, you should add a line diff -u --recursive --new-file v2.1.98/linux/Documentation/hayes-esp.txt linux/Documentation/hayes-esp.txt --- v2.1.98/linux/Documentation/hayes-esp.txt Tue Feb 17 13:12:43 1998 +++ linux/Documentation/hayes-esp.txt Tue Apr 28 14:22:04 1998 @@ -51,7 +51,7 @@ irq=[0x100],[0x140],[0x180],[0x200],[0x240],[0x280],[0x300],[0x380] The address in brackets is the base address of the card. The IRQ of -nonexistant cards can be set to 0. If and IRQ of a card that does exist is set +nonexistent cards can be set to 0. If an IRQ of a card that does exist is set to 0, the driver will attempt to guess at the correct IRQ. For example, to set the IRQ of the card at address 0x300 to 12, the insmod command would be: diff -u --recursive --new-file v2.1.98/linux/Documentation/ide.txt linux/Documentation/ide.txt --- v2.1.98/linux/Documentation/ide.txt Sun Nov 30 13:48:41 1997 +++ linux/Documentation/ide.txt Tue Apr 28 14:22:04 1998 @@ -221,7 +221,7 @@ not making it to the host. Check how you have the hardware jumpered and make sure it matches what the driver expects (see the configuration instructions above). If you have a PCI system, also check the BIOS -setup; i've had one report of a system which was shipped with IRQ 15 +setup; I've had one report of a system which was shipped with IRQ 15 disabled by the BIOS. The kernel is able to execute binaries directly off of the cdrom, diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/INTERFACE linux/Documentation/isdn/INTERFACE --- v2.1.98/linux/Documentation/isdn/INTERFACE Wed Apr 1 20:11:47 1998 +++ linux/Documentation/isdn/INTERFACE Tue Apr 28 14:22:04 1998 @@ -53,7 +53,7 @@ ***CHANGE0.6: New since this version. Also to be preset by the HL-driver. With this value the HL-driver - tells to the LL the maximum size of a data-packet it will accept. + tells the LL the maximum size of a data-packet it will accept. unsigned long features; @@ -70,8 +70,8 @@ ***CHANGE0.7.4: New field. To be preset by the HL-driver, if it supports sk_buff's. The driver - should put here the amount of additional space needed in sk-buff's for - its internal purposes. Drivers not supporting sk_buff's should put + should put here the amount of additional space needed in sk_buff's for + its internal purposes. Drivers not supporting sk_buff's should initialize this field to 0. void (*rcvcallb_skb)(int, int, struct sk_buff *) @@ -211,7 +211,7 @@ All commands will be performed by calling the function command() described above from within the LL. The field command of the struct-parameter will - contain the desired command, the field driver always is set to the + contain the desired command, the field driver is always set to the appropriate driver-Id. Until now, the following commands are defined: @@ -436,7 +436,7 @@ arg = unused. para = unused. -3. Description of the events to be signaled by the HL-driver to th LL. +3. Description of the events to be signaled by the HL-driver to the LL. All status-changes are signaled via calling the previously described function statcallb(). The field command of the struct isdn_cmd has @@ -520,7 +520,7 @@ remote-station has initiated establishment) The HL driver should call this when the logical l2/l3 protocol - connection on top of the physical B-channel is esatblished . + connection on top of the physical B-channel is established. Parameter: driver = driver-Id @@ -624,7 +624,7 @@ With this call, the HL-driver delivers CAUSE-messages to the LL. Currently the LL does not use this messages. Their contents is simply logged via kernel-messages. Therefore, currently the format of the - messages is currently completely free. However they should be printable. + messages is completely free. However they should be printable. Parameter: driver = driver-Id diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README linux/Documentation/isdn/README --- v2.1.98/linux/Documentation/isdn/README Wed Apr 1 20:11:47 1998 +++ linux/Documentation/isdn/README Tue Apr 28 14:22:04 1998 @@ -62,7 +62,7 @@ read: raw D-channel-messages (format: depends on driver). ioctl: depends on driver, i.e. for the ICN-driver, the base-address of the ports and the shared memory on the card can be set and read - also the boot-code an the protocol software can be loaded into + also the boot-code and the protocol software can be loaded into the card. O N L Y !!! for debugging (no locking against other devices): @@ -74,7 +74,7 @@ 128 tty-devices (64 cuix and 64 ttyIx) with integrated modem-emulator: The functionality is almost the same as that of a serial device - (the line-discs are handled by the kernel, which lets you run + (the line-discs are handled by the kernel), which lets you run SLIP, CSLIP and asynchronous PPP through the devices. We have tested Seyon, minicom, CSLIP (uri-dip) PPP and mgetty (compiled with NO_FAX), XCept. @@ -96,7 +96,7 @@ ATI Return "ISDN for Linux...". ATI0 " ATI1 " - ATI2 Report of last connection. + ATI2 Report of last connection. ATO On line (data mode). ATQ0 Enable result codes (default). ATQ1 Disable result codes (default). @@ -107,9 +107,9 @@ ATZ Load registers and EAZ/MSN from Profile. AT&Bx Set Send-Packet-size to x (max. 4000) The real packet-size may be limited by the - low-level-driver used. i.e.: the HiSax-Module- + low-level-driver used. e.g. the HiSax-Module- limit is 2000. You will get NO Error-Message, - if you set it to higher Values, because at the + if you set it to higher values, because at the time of giving this command the corresponding driver may not be selected (see "Automatic Assignment") however the size of outgoing packets @@ -245,7 +245,7 @@ 19 0 Service-Octet-2 20 0 Bit coded register (readonly) Service-Octet-1 of last call. - Bit mapping is the same like register 18 + Bit mapping is the same as register 18 21 0 Bit coded register (readonly) Set on incoming call (during RING) to octet 3 of calling party number IE (Numbering plan) @@ -263,17 +263,17 @@ All inactive physical lines are listening to all EAZs for incoming calls and are NOT assigned to a specific tty or network interface. When an incoming call is detected, the driver looks first for a network - interfaces and then for an opened tty which: + interface and then for an opened tty which: 1. is configured for the same EAZ. 2. has the same protocol settings for the B-channel. 3. (only for network interfaces if the security flag is set) contains the caller number in its access list. 4. Either the channel is not bound exclusively to another Net-interface, or - it is bound AND the other checks apply to exact this Interface. + it is bound AND the other checks apply to exactly this Interface. (For usage of the bind-features, refer to the isdnctrl-man-page) - Only when a matching interface or tty is found, the call is accepted + Only when a matching interface or tty is found is the call accepted and the "connection" between the low-level-layer and the link-level-layer is established and kept until the end of the connection. In all other cases no connection is established. Isdn4linux can be @@ -309,7 +309,7 @@ 4. Device-inodes - The major and minor-numbers and its names are described in + The major and minor numbers and their names are described in Documentation/devices.txt. The major-numbers are: 43 for the ISDN-tty's. @@ -357,7 +357,7 @@ i) Setup the interface with ifconfig as usual, and set a route to it. - j) (optional) If you run X11 and have Tcl/Tk-wish Version4.0, you can use + j) (optional) If you run X11 and have Tcl/Tk-wish Version 4.0, you can use the script tools/tcltk/isdnmon. You can add actions for line-status changes. See the comments at the beginning of the script for how to do that. There are other tty-based tools in the tools-subdirectory @@ -399,7 +399,7 @@ "isdnctrl secure off" - Switch of secure operation (default). + Switch off secure operation (default). "isdnctrl ihup [on|off]" Switch the hang-up-timer for incoming calls on or off. @@ -434,15 +434,15 @@ Selects the type of packet-encapsulation. The encapsulation can be changed only while an interface is down. - At the moment th following Values are supported: + At the moment the following values are supported: rawip (Default) Selects raw-IP-encapsulation. This means, MAC-headers are stripped off. ip IP with type-field. Same as IP but the type-field of the MAC-header is preserved. - x25iface x25 interface encapsulation (first byte semantics as defined in + x25iface X.25 interface encapsulation (first byte semantics as defined in ../networking/x25-iface.txt). Use this for running the linux - x25 network protocol stack (AF_X25 sockets) on top of isdn. + X.25 network protocol stack (AF_X25 sockets) on top of isdn. cisco-h A special-mode for communicating with a Cisco, which is configured to do "hdlc" ethernet No stripping. Packets are sent with full MAC-header. @@ -483,7 +483,7 @@ dial out using a specific Card or even preserve a specific Channel for Dialout of a specific net-interface. This can be done with the above command. Replace by whatever you assigned while loading the - module. The is counting from zero. the upper Limit + module. The is counting from zero. The upper Limit depends on the card used. At the Moment no card supports more than 2 Channels, so the upper limit is one. diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.HiSax linux/Documentation/isdn/README.HiSax --- v2.1.98/linux/Documentation/isdn/README.HiSax Wed Apr 1 20:11:47 1998 +++ linux/Documentation/isdn/README.HiSax Tue Apr 28 14:22:04 1998 @@ -66,7 +66,7 @@ with LILO or LOADLIN or, if built as a module, using insmod/modprobe with parameters. There is also some config needed before you compile the kernel and/or -modules. It is enclose in the normal "make [menu]config" target at the +modules. It is included in the normal "make [menu]config" target at the kernel. Don't forget it, especially to select the right D-channel protocol. Please note: All PnP cards need to be configured with isapnp and will work @@ -152,7 +152,7 @@ At the moment IRQ sharing is not possible. Please make sure that your IRQ is free and enabled for ISA use. Note: For using the ELSA PCMCIA you need the cardmanager under MSDOS for -enabling in the moment, then boot linux with loadlin. +enabling at the moment, then boot linux with loadlin. Examples for module loading @@ -272,7 +272,7 @@ hisaxctrl DebugCmd - default is HiSax, if you didn't specified one. + default is HiSax, if you didn't specify one. DebugCmd is 1 for generic debugging 11 for layer 1 development debugging @@ -309,18 +309,18 @@ With DebugCmd set to 13: 1 Warnings (default: on) - 2 l3 protocol discriptor errors + 2 l3 protocol descriptor errors 4 l3 state machine 8 charge info debugging (1TR6) For example, 'hisaxctrl HiSax 1 0x3ff' enables full generic debugging. Because of some obscure problems with some switch equipment, the delay -between CONNECT message and sending the first data on th B-channel is now +between the CONNECT message and sending the first data on the B-channel is now configurable with hisaxctrl 2 - in ms Value between 50 an 800 ms are recommended. + in ms Value between 50 and 800 ms is recommended. Warning @@ -389,7 +389,7 @@ Attention NEW VERSION, the old leased line syntax won't work !!! You can use HiSax to connect your Linux-Box via an ISDN leased line -to i.e. the internet: +to e.g. the Internet: 1. Build a kernel which includes the HiSax driver either as a module or as part of the kernel. @@ -407,7 +407,7 @@ vi /etc/lilo.conf lilo - Your lilo.conf _might_ look as the following: + Your lilo.conf _might_ look like the following: # LILO configuration-file # global section @@ -449,7 +449,7 @@ /sbin/isdnctrl secure isdn0 on /sbin/isdnctrl huptimeout isdn0 0 /sbin/isdnctrl l2_prot isdn0 hdlc - # Attention you must not set a outgoing number !!! This won't work !!! + # Attention you must not set an outgoing number !!! This won't work !!! # The incomming number is LEASED0 for the first card, LEASED1 for the # second and so on. /sbin/isdnctrl addphone isdn0 in LEASED0 @@ -465,7 +465,7 @@ /sbin/hisaxctrl HiSax 5 1 Remarks: -a) If you have a CISCO don´t forget to switch off the KEEP ALIVE option! +a) If you have a CISCO don't forget to switch off the KEEP ALIVE option! Here an example script: #!/bin/sh diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.act2000 linux/Documentation/isdn/README.act2000 --- v2.1.98/linux/Documentation/isdn/README.act2000 Wed Apr 1 20:11:47 1998 +++ linux/Documentation/isdn/README.act2000 Tue Apr 28 14:22:04 1998 @@ -36,7 +36,7 @@ 3, 5, 7, 10, 11, 12, 15 and none (polled mode) -The ACT2000 driver either may be build into kernel or as a module. +The ACT2000 driver may either be built into the kernel or as a module. Initialization depends on how the driver is built: Driver built into the kernel: @@ -78,11 +78,11 @@ act_bus=b act_port=p act_irq=i act_id=idstring - where b, p, i and idstring have the same meanings like parameters + where b, p, i and idstring have the same meanings as the parameters described for the builtin version above. Using the "actctrl"-utility, the same features apply to the modularized - version like to the kernel-builtin one. (i.e. loading of firmware and + version as to the kernel-builtin one. (i.e. loading of firmware and configuring the D-channel protocol) Loading the firmware into the card: @@ -90,7 +90,7 @@ The firmware is supplied together with the isdn4k-utils package. It can be found in the subdirectory act2000/firmware/ - Assumed you have installed the utility-package correctly, the firmware + Assuming you have installed the utility-package correctly, the firmware will be downloaded into the card using the following command: actctrl -d idstring load /etc/isdn/bip11.btl diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.audio linux/Documentation/isdn/README.audio --- v2.1.98/linux/Documentation/isdn/README.audio Thu May 29 21:53:03 1997 +++ linux/Documentation/isdn/README.audio Tue Apr 28 14:22:04 1998 @@ -22,7 +22,7 @@ Commands supported in audio mode: -All audio mode commands have the one of the following form: +All audio mode commands have one of the following forms: AT+Vxx? Show current setting. AT+Vxx=? Show possible settings. @@ -89,8 +89,8 @@ End of audio data. (i.e. caused by a hangup of the remote side) Emulator stops recording, responding with VCON. - Abort recording, (send by appl.) Emulator - stops recording, sends DLE,ETX. + Abort recording, (send by appl.) Emulator + stops recording, sends DLE,ETX. Escape sequence for DLE in data stream. 0 Touchtone "0" received. ... diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.avmb1 linux/Documentation/isdn/README.avmb1 --- v2.1.98/linux/Documentation/isdn/README.avmb1 Thu May 29 21:53:03 1997 +++ linux/Documentation/isdn/README.avmb1 Tue Apr 28 14:22:04 1998 @@ -26,7 +26,7 @@ AVM GmbH provides several t4-files for the different D-channel protocols (b1.t4 for Euro-ISDN). Install these file in /lib/isdn. -If you not compile the driver as modules, you have to add the +If you do not compile the driver as modules, you have to add the card(s) and load them after booting: avmcapictrl add 0x150 15 diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.concap linux/Documentation/isdn/README.concap --- v2.1.98/linux/Documentation/isdn/README.concap Wed Apr 1 20:11:47 1998 +++ linux/Documentation/isdn/README.concap Tue Apr 28 14:22:04 1998 @@ -10,7 +10,7 @@ This is currently only used inside the isdn subsystem. But it might also be useful to other kinds of network devices. Thus, if you want -to suggest changes that improve usability or performace of the +to suggest changes that improve usability or performance of the interface, please let me know. I'm willing to include them in future releases (even if I needed to adapt the current isdn code to the changed interface). @@ -25,14 +25,14 @@ several different encapsulation protocols at once. The isdn device driver did already support several different -encapsulation protocols. The encapsulation protocol is configuered by a +encapsulation protocols. The encapsulation protocol is configured by a user space utility (isdnctrl). The isdn network interface code then uses several case statements which select appropriate actions -depending on the currently configuered encapsulation protocol. +depending on the currently configured encapsulation protocol. In contrast, LAN network interfaces always used a single encapsulation protocol which is unique to the hardware type of the interface. The LAN -encapsulation is usually done by just sticking a header at the data. Thus, +encapsulation is usually done by just sticking a header on the data. Thus, traditional linux network device drivers used to process the encapsulation protocol directly (usually by just providing a hard_header() method in the device structure) using some hardware type specific support @@ -46,13 +46,13 @@ Many Encapsulation protocols used on top of WAN connections will not just -stick a header at the data. They also might need to set up or release +stick a header on the data. They also might need to set up or release the WAN connection. They also might want to send other data for their -private purpose over the wire. I.e. ppp does a lot of link level -negotiation before the first peace of user data can be transmitted. +private purpose over the wire, e.g. ppp does a lot of link level +negotiation before the first piece of user data can be transmitted. Such encapsulation protocols for WAN devices are typically more complex -than encapsulation protocols for lan devices. Thus, network interfaces -code for typical WAN devices also tends to be more more complex. +than encapsulation protocols for lan devices. Thus, network interface +code for typical WAN devices also tends to be more complex. In order to support Linux' x25 PLP implementation on top of @@ -65,22 +65,22 @@ increased. -Likewise, a same encapsulation protocol will frequently be needed by -several different interfaces of even different hardware type. I.e. the -synchronous ppp implementaion used by the isdn driver and the -asyncronous ppp implemntation used by the ppp driver have a lot of +Likewise, a similar encapsulation protocol will frequently be needed by +several different interfaces of even different hardware type, e.g. the +synchronous ppp implementation used by the isdn driver and the +asyncronous ppp implementation used by the ppp driver have a lot of similar code in them. By cleanly separating the encapsulation protocol from the hardware specific interface stuff such code could be shared better in future. -When operating over dial-up-connections (i.e. telephone lines via modem, +When operating over dial-up-connections (e.g. telephone lines via modem, non-permanent virtual circuits of wide area networks, ISDN) many -encapsulation protocols will need to control the connection. Therfore, +encapsulation protocols will need to control the connection. Therefore, some basic connection control primitives are supported. The type and semantics of the connection (i.e the ISO layer where connection service is provided) is outside our scope and might be different depending on -the encapsulation protocol used. I.e. for a ppp module using our service +the encapsulation protocol used, e.g. for a ppp module using our service on top of a modem connection a connect_request will result in dialing a (somewhere else configured) remote phone number. For an X25-interface module (LAPB semantics, as defined in Documentation/networking/x25-iface.txt) @@ -88,7 +88,7 @@ datalink connection. -The encapsulation protocol currently provides the follwing +The encapsulation protocol currently provides the following service primitives to the network device. - create a new encapsulation protocol instance @@ -121,7 +121,7 @@ struct device *ndev, struct concap_device_ops *dops); - /* inactivate an encapsulation protocol instance. The encapsulation + /* deactivate an encapsulation protocol instance. The encapsulation protocol may not call any *dops methods after this. */ int (*close)(struct concap_proto *cprot); @@ -145,24 +145,24 @@ A Network interface using encapsulation protocols must also provide some service primitives to the encapsulation protocol: -- request data beeing submitted by lower layer (device hardware) -- request a connection beeing set up by lower layer -- request a connection beeing released by lower layer +- request data being submitted by lower layer (device hardware) +- request a connection being set up by lower layer +- request a connection being released by lower layer -The encapsulations protocol accesses those primitives via callbacks +The encapsulation protocol accesses those primitives via callbacks provided by the network interface within a struct concap_device_ops. struct concap_device_ops{ - /* to request data is submitted by device*/ + /* to request data be submitted by device */ int (*data_req)(struct concap_proto *, struct sk_buff *); /* Control methods must be set to NULL by devices which do not - support connection control.*/ - /* to request a connection is set up */ + support connection control. */ + /* to request a connection be set up */ int (*connect_req)(struct concap_proto *); - /* to request a connection is released */ + /* to request a connection be released */ int (*disconn_req)(struct concap_proto *); }; @@ -172,7 +172,7 @@ -An encapsulation protocol itsself is actually the +An encapsulation protocol itself is actually the struct concap_proto{ struct device *net_dev; /* net device using our service */ struct concap_device_ops *dops; /* callbacks provided by device */ @@ -189,7 +189,7 @@ Most of this is filled in when the device requests the protocol to be reset (opend). The network interface must provide the net_dev and -dops pointers. Other concap_proto members should be considerd private +dops pointers. Other concap_proto members should be considered private data that are only accessed by the pops callback functions. Likewise, a concap proto should access the network device's private data only by means of the callbacks referred to by the dops pointer. @@ -217,21 +217,21 @@ reduce the complexity of certain network interface implementations. The trade off is that it introduces yet another procedure call layer when processing the protocol. This has of course some impact on -performace. However, typically the concap interface will be used by +performance. However, typically the concap interface will be used by devices attached to slow lines (like telephone, isdn, leased synchronous -lines). For such slow lines, the overhead is probably neglectable. +lines). For such slow lines, the overhead is probably negligible. This might no longer hold for certain high speed WAN links (like ATM). If general linux network interfaces explicitly supported concap -protocols (i.e. by a member struct concap_proto* in struct device) +protocols (e.g. by a member struct concap_proto* in struct device) then the interface of the service function could be changed by passing a pointer of type (struct device*) instead of type (struct concap_proto*). Doing so would make many of the service -functions compatible to network device support fuctions. i.e. +functions compatible to network device support fuctions. -i.e. instead of the concap protocol's service function +e.g. instead of the concap protocol's service function int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb); @@ -252,7 +252,7 @@ This might even allow for some protocol stacking. And the network interface might even register the same data_req() function directly as its hard_start_xmit() method when a zero layer encapsulation -protocol is configured. Thus, eliminating the performace penalty +protocol is configured. Thus, eliminating the performance penalty of the concap interface when a trivial concap protocol is used. Nevertheless, the device remains able to support encapsulation protocol configuration. diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.icn linux/Documentation/isdn/README.icn --- v2.1.98/linux/Documentation/isdn/README.icn Thu May 29 21:53:03 1997 +++ linux/Documentation/isdn/README.icn Tue Apr 28 14:22:04 1998 @@ -62,8 +62,8 @@ 1 1 1 0 0x368 1 1 1 1 NOT ALLOWED! -The ICN driver either may be build into kernel or as a module. Initialization -depends on how the drive is built: +The ICN driver may be built into the kernel or as a module. Initialization +depends on how the driver is built: Driver built into the kernel: @@ -102,7 +102,7 @@ portbase=p membase=m icn_id=idstring [icn_id2=idstring2] - where p, m, idstring1 and idstring2 have the same meanings like + where p, m, idstring1 and idstring2 have the same meanings as the parameters described for the kernel-version above. When using the ICN double card (4B), you MUST define TWO idstrings. @@ -127,12 +127,12 @@ pc_1t_ca.bin - Image of firmware for german 1TR6 protocol. pc_eu_ca.bin - Image if firmware for EDSS1 (Euro-ISDN) protocol. - Assumed you have installed the utility-package correctly, the firmware + Assuming you have installed the utility-package correctly, the firmware will be downloaded into the 2B-card using the following command: icnctrl -d Idstring load /etc/isdn/loadpg.bin /etc/isdn/pc_XX_ca.bin - where XX is either "1t" or "eu", depending of the D-Channel protocol + where XX is either "1t" or "eu", depending on the D-Channel protocol used on your S0-bus and Idstring is the Name of the card, given during insmod-time or (for kernel-builtin driver) on the kernel commandline. diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.pcbit linux/Documentation/isdn/README.pcbit --- v2.1.98/linux/Documentation/isdn/README.pcbit Tue Apr 23 02:31:46 1996 +++ linux/Documentation/isdn/README.pcbit Tue Apr 28 14:22:04 1998 @@ -16,20 +16,20 @@ Known Limitations: -- The board reset proceeding is at the moment incorrect and will only +- The board reset procedure is at the moment incorrect and will only allow you to load the firmware after a hard reset. -- Only HDLC in B-channels is supported at the moment. There is now -current support to X.25 in B or D channels nor LAPD in B -channels. The main reason is that this two other protocol modes have, +- Only HDLC in B-channels is supported at the moment. There is no +current support for X.25 in B or D channels nor LAPD in B +channels. The main reason is that these two other protocol modes have, to my knowledge, very little use. If you want to see them implemented *do* send me a mail. -- The driver often triggers errors in the board that i and the +- The driver often triggers errors in the board that I and the manufacturer believe to be caused by bugs in the firmware. The current -version includes several proceedings for error recovery that should +version includes several procedures for error recovery that should allow normal operation. Plans for the future include cooperation with -the manufacturer in order to solve this problems. +the manufacturer in order to solve this problem. Information/hints/help can be obtained in the linux isdn mailing list (isdn4linux@hub-wue.franken.de) or directly from me. diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.sc linux/Documentation/isdn/README.sc --- v2.1.98/linux/Documentation/isdn/README.sc Thu Feb 27 10:57:29 1997 +++ linux/Documentation/isdn/README.sc Tue Apr 28 14:22:04 1998 @@ -9,7 +9,7 @@ bugs and defects either known or unknown. Use this software at your own risk. There is NO SUPPORT for this software. Some help may be available through the web site or the mailing list but such support is totally at -our own option and without warrantee. If you choose to assume all and +our own option and without warranty. If you choose to assume all and total risk by using this driver, we encourage you to join the beta mailing list. @@ -17,7 +17,7 @@ majordomo@spellcast.com with the words "subscribe linux-beta" as the only contents of the message. Do not include a signature. If you choose to remove yourself from this list at a later date, send another message to -the same address with the words "unsubscribe linux-beta" as it's only +the same address with the words "unsubscribe linux-beta" as its only contents. TABLE OF CONTENTS @@ -42,7 +42,7 @@ --------------- The revision 2 Linux driver for SpellCaster ISA ISDN adapters is built -upon ISDN4Linux available seperately or as included in Linux 2.0 and later. +upon ISDN4Linux available separately or as included in Linux 2.0 and later. The driver will support a maximum of 4 adapters in any one system of any type including DataCommute/BRI, DataCommute/PRI and TeleCommute/BRI for a maximum of 92 channels for host. The driver is supplied as a module in @@ -74,14 +74,14 @@ allow us to utilize all of the available RAM on the adapter through only one 16K page. - Better detection of available upper memory. The probing routines - have been improved to better detect avaialble shared RAM pages and + have been improved to better detect available shared RAM pages and used pages are now locked. - Decreased loading time and a wider range of I/O ports probed. We have significantly reduced the amount of time it takes to load the driver and at the same time doubled the number of I/O ports - probed increasing the likelyhood of finding an adapter. + probed increasing the likelihood of finding an adapter. - We now support all ISA adapter models with a single driver instead - of seperate drivers for each model. The revision 2 driver supports + of separate drivers for each model. The revision 2 driver supports the DataCommute/BRI, DataCommute/PRI and TeleCommute/BRI in any combination up to a maximum of four adapters per system. - On board PPP protocol support has been removed in favour of the @@ -115,7 +115,7 @@ 2.1 Unpacking and installing the driver - 1. As root, create a directory in a convienient place. We suggest + 1. As root, create a directory in a convenient place. We suggest /usr/src/spellcaster. 2. Unpack the archive with : @@ -170,36 +170,38 @@ 2.6 How to setup ISDN4Linux with the driver -There are two main configurations which you can use with the driver: +There are three main configurations which you can use with the driver: A) Basic HDLC connection B) PPP connection C) MLPPP connection -It should be mentioned here that you may also use a tty connection if you desire. -The Documentation directory of the isdn4linux subsystem offers a good documentation -on this feature. +It should be mentioned here that you may also use a tty connection if you +desire. The Documentation directory of the isdn4linux subsystem offers good +documentation on this feature. A) 10 steps to the establishment of a basic HDLC connection ----------------------------------------------------------- - please open the isdn-hdlc file in the examples directory and follow along... - This file is a script used to configure a BRI ISDN TA to establish a basic HDLC - connection between its two channels. There two network interfaces which are - created and two routes added between the channels. + This file is a script used to configure a BRI ISDN TA to establish a + basic HDLC connection between its two channels. Two network + interfaces are created and two routes added between the channels. - i) using the isdnctrl utitity, add an interface with "addif" and name it "isdn0" + i) using the isdnctrl utitity, add an interface with "addif" and + name it "isdn0" ii) add the outgoing and inbound telephone numbers iii) set the Layer 2 protocol to hdlc - iv) set the eaz of the interface to be the phone number of that specific channel + iv) set the eaz of the interface to be the phone number of that + specific channel v) to turn the callback features off, set the callback to "off" and the callback delay (cbdelay) to 0. vi) the hangup timeout can be set to a specified number of seconds - vii) the hangup upon incomming call can be set on or off - viii) use the ifconfig command to bring-up the network interface with a specific - IP address and point to point address - viv) add a route to the IP address through the isdn0 interface + vii) the hangup upon incoming call can be set on or off + viii) use the ifconfig command to bring up the network interface with + a specific IP address and point to point address + ix) add a route to the IP address through the isdn0 interface x) a ping should result in the establishment of the connection @@ -208,13 +210,15 @@ - please open the isdn-ppp file in the examples directory and follow along... - This file is a script used to configure a BRI ISDN TA to establish a PPP connection - between the two channels. The file is almost identical to the HDLC connection - example except that the packet ecapsulation type has to be set. - - use the same procedure as in the HDLC connection from steps i) to iii) then, - after the Layer 2 protocol is set, set the encapsulation "encap" to syncppp. - With this done, the rest of the steps, iv) to x) can be followed from above. + This file is a script used to configure a BRI ISDN TA to establish a + PPP connection between the two channels. The file is almost + identical to the HDLC connection example except that the packet + ecapsulation type has to be set. + + use the same procedure as in the HDLC connection from steps i) to + iii) then, after the Layer 2 protocol is set, set the encapsulation + "encap" to syncppp. With this done, the rest of the steps, iv) to x) + can be followed from above. Then, the ipppd (ippp daemon) must be setup: @@ -223,52 +227,55 @@ xiii) set the mru size to 2000 xiv) link the two /dev interfaces to the daemon -NOTE: A "*" in the inbound telephone number specifies that a call can be accepted - on any number. +NOTE: A "*" in the inbound telephone number specifies that a call can be +accepted on any number. C) Establishment of a MLPPP connection -------------------------------------- - please open the isdn-mppp file in the examples directory and follow along... - This file is a script used to configure a BRI ISDN TA to accept a Multi Link PPP - connection. + This file is a script used to configure a BRI ISDN TA to accept a + Multi Link PPP connection. - i) using the isdnctrl utitity, add an interface with "addif" and name it "ippp0" + i) using the isdnctrl utitity, add an interface with "addif" and + name it "ippp0" ii) add the inbound telephone number - iii) set the Layer 2 protocol to hdlc and the Layer 3 protocol to trans (transparent) + iii) set the Layer 2 protocol to hdlc and the Layer 3 protocol to + trans (transparent) iv) set the packet encapsulation to syncppp - v) set the eaz of the interface to be the phone number of that specific channel - vi) to turn the callback features off, set the callback to "off" and + v) set the eaz of the interface to be the phone number of that + specific channel + vi) to turn the callback features off, set the callback to "off" and the callback delay (cbdelay) to 0. vi) the hangup timeout can be set to a specified number of seconds - vii) the hangup upon incomming call can be set on or off + vii) the hangup upon incoming call can be set on or off viii) add a slave interface and name it "ippp32" for example - viv) set the similar parameters for the ippp32 interface - x) use the ifconfig command to bring-up the ippp0 interface with a specific - IP address and point to point address + ix) set the similar parameters for the ippp32 interface + x) use the ifconfig command to bring-up the ippp0 interface with a + specific IP address and point to point address xi) add a route to the IP address through the ippp0 interface xii) use the ipppd function found in /sbin/ipppd to set the following: xiii) take out (minus) bsd compression xiv) set the mru size to 2000 xv) add (+) the multi-link function "+mp" - xv) link the two /dev interfaces to the daemon + xvi) link the two /dev interfaces to the daemon -NOTE: To use the MLPPP connection to dial OUT to a MLPPP connection, change the - inbound telephone numbers to the outgoing telephone numbers of the MLPPP - host. +NOTE: To use the MLPPP connection to dial OUT to a MLPPP connection, change +the inbound telephone numbers to the outgoing telephone numbers of the MLPPP +host. 3. Beta Change Summaries and Miscellaneous Notes ------------------------------------------------ -When using the "scctrl" utility to upload firmware revisions on the board, please -note that the byte count displayed at the end of the operation may be different -than the total number of bytes in the "dcbfwn.nn.sr" file. Please disregard the -displayed byte count. - -It was noted that in Beta Release 1, the module would fail to load and result in a -segmentation fault when insmod"ed". This problem was created when one of the -isdn4linux parameters, (isdn_ctrl, data field) was filled in. In some cases, this -data field was NULL, and was left unchecked, so when it was referenced.. segv. -The bug has been fixed around line 63-68 of event.c. +When using the "scctrl" utility to upload firmware revisions on the board, +please note that the byte count displayed at the end of the operation may be +different from the total number of bytes in the "dcbfwn.nn.sr" file. Please +disregard the displayed byte count. + +It was noted that in Beta Release 1, the module would fail to load and result +in a segmentation fault when 'insmod'ed. This problem was created when one of +the isdn4linux parameters, (isdn_ctrl, data field) was filled in. In some +cases, this data field was NULL, and was left unchecked, so when it was +referenced... segv. The bug has been fixed around line 63-68 of event.c. diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/README.x25 linux/Documentation/isdn/README.x25 --- v2.1.98/linux/Documentation/isdn/README.x25 Wed Apr 1 20:11:47 1998 +++ linux/Documentation/isdn/README.x25 Tue Apr 28 14:22:04 1998 @@ -3,7 +3,7 @@ This is experimental code and should be used with linux version 2.1.72. -or later. Use it completely on your own risk. +or later. Use it completely at your own risk. As new versions appear, the stuff described here might suddenly change @@ -161,7 +161,7 @@ x25route add 01 will cause all x.25 connections to the destination x.25-address -"01" beeing routed to your created isdn network interface. +"01" to be routed to your created isdn network interface. There are currently no real x25 applications available. However, for @@ -185,14 +185,14 @@ This will set up a sample configuration using the isdnloop and hisax driver and create some isdn network interfaces. It is recommended that all other isdn drivers and the -x25 module is unloaded before calling this script. +x25 module are unloaded before calling this script. Known problems and deficiencies: The isdnloop HL driver apparently has problems to re-establish a -connection that has been hang up from the outgoing device. You have to +connection that has been hung up from the outgoing device. You have to unload the isdnloop driver after the faked isdn-connection is closed and insmod it again. With the Hisax driver, this problem is not present. @@ -210,7 +210,7 @@ isdnloop driver. It seems that it is not caused by the isdn code. Somehow, the inode of a socket is freed while a process still refers the socket's wait queue. This causes problems when the process tries to -remove itsself from the wait queue (refered by the dangling +remove itself from the wait queue (refered by the dangling sock->sleep pointer) before returning from a select() system call. - Henner diff -u --recursive --new-file v2.1.98/linux/Documentation/isdn/syncPPP.FAQ linux/Documentation/isdn/syncPPP.FAQ --- v2.1.98/linux/Documentation/isdn/syncPPP.FAQ Sun May 19 22:38:42 1996 +++ linux/Documentation/isdn/syncPPP.FAQ Tue Apr 28 14:22:04 1998 @@ -1,8 +1,8 @@ simple isdn4linux PPP FAQ .. to be continued .. not 'debugged' ------------------------------------------------------------------- -Q01: what's pppd,ipppd, syncPPP , asyncPPP ?? -Q02: error message "this systems lacks PPP support" +Q01: what's pppd, ipppd, syncPPP, asyncPPP ?? +Q02: error message "this system lacks PPP support" Q03: strange information using 'ifconfig' Q04: MPPP?? What's that and how can I use it ... Q05: I tried MPPP but it doesn't work @@ -16,7 +16,7 @@ ------------------------------------------------------------------- -Q01: pppd,ipppd, syncPPP , asyncPPP .. what is that ? +Q01: pppd, ipppd, syncPPP, asyncPPP .. what is that ? what should I use? A: The pppd is for asynchronous PPP .. asynchronous means here, the framing is character based. (e.g when @@ -45,7 +45,7 @@ -- Q02: when I start the ipppd .. I only get the - error message "this systems lacks PPP support" + error message "this system lacks PPP support" A: check that at least the device 'ippp0' exists. (you can check this e.g with the program 'ifconfig') The ipppd NEEDS this device under THIS name .. @@ -123,7 +123,7 @@ -- -Q08: A wanna talk to remote machines, which need +Q08: I wanna talk to remote machines, which need a different configuration. The only way I found to do this is to kill the ipppd and start a new one with another config to connect @@ -152,14 +152,14 @@ Q10: I wanna use dynamic IP address assignment ... How must I configure the network device. -A: At least you must have a routing, which forwards +A: At least you must have a route which forwards a packet to the ippp network-interface to trigger the dial-on-demand. - A default routing to the ippp-interface will work. + A default route to the ippp-interface will work. Now you must choose a dummy IP address for your interface. If for some reason you can't set the default - routing to the ippp interface, you may take any + route to the ippp interface, you may take any address of the subnet from which you expect your dynamic IP number and set a 'network route' for this subnet to the ippp interface. diff -u --recursive --new-file v2.1.98/linux/Documentation/java.txt linux/Documentation/java.txt --- v2.1.98/linux/Documentation/java.txt Fri Jan 23 18:10:31 1998 +++ linux/Documentation/java.txt Tue Apr 28 14:22:04 1998 @@ -18,7 +18,7 @@ nonstandard classes (not included in the same directory as the application itself). -2) You have to compile BINFMT_MISC either as module or into +2) You have to compile BINFMT_MISC either as a module or into the kernel (CONFIG_BINFMT_MISC) and set it up properly. If you choose to compile it as a module, you will have to insert it manually with modprobe/insmod, as kerneld diff -u --recursive --new-file v2.1.98/linux/Documentation/joystick.txt linux/Documentation/joystick.txt --- v2.1.98/linux/Documentation/joystick.txt Wed Apr 8 19:36:24 1998 +++ linux/Documentation/joystick.txt Tue Apr 28 14:22:04 1998 @@ -15,7 +15,7 @@ 2. Usage ~~~~~~~~ If you enable the joystick driver in the kernel configuration, all -connected joystick should be found automatically. If that doesn't work, you +connected joysticks should be found automatically. If that doesn't work, you can pass the joystick driver the following kernel command line arguments: js=0xXX,0xYY @@ -120,7 +120,7 @@ port started ALL the joystick one shots. If the one that we are reading is short enough and the first one to be read, the second one will return - bad data if it's one shot has not expired when + bad data if its one shot has not expired when the joystick port is written for the second time. Thus solves the mystery delay problem in 0.2! Version 0.5 Upgraded the driver to the 0.99.9 kernel, added @@ -155,7 +155,7 @@ Better ioctl names. Kept binary compatibility. Removed 'save_busy'. Just set busy to 1. Version 0.9.0 Based on 0.7.3 - New read function that allows two axes have same value + New read function that allows two axes to have the same value New joystick calibration code Real support for 3-axis joysticks CPU speed independent timeouts @@ -180,7 +180,7 @@ Version 1.0.2 Works, many bugs fixed, more yet to come Version 1.0.3 Tail cutting logic changes & fixes Fix in js_do_bh - no more zero values for axes - Lost event changest & fixes + Lost event changes & fixes Version 1.0.4 Kernel command line & module configuration support Better cli()/sti() handling Linux 2.1.25 select => poll changes diff -u --recursive --new-file v2.1.98/linux/Documentation/kmod.txt linux/Documentation/kmod.txt --- v2.1.98/linux/Documentation/kmod.txt Tue Mar 17 22:18:13 1998 +++ linux/Documentation/kmod.txt Sat Apr 25 22:13:39 1998 @@ -4,19 +4,19 @@ Kmod is a simple replacement for kerneld. It consists of a request_module() replacement and a kernel thread called kmod. When the kernel requests a module, the kmod wakes up and execve()s modprobe, -passing it the name that was requested. After a configurable period of -time, kmod will have delete_module() remove any unused modules. +passing it the name that was requested. -Kmod is configurable through two entries in /proc/sys/kernel. You can -set the path of modprobe (where the kernel looks for it) by doing: +If you have the /proc filesystem mounted, you can set the path of +modprobe (where the kernel looks for it) by doing: echo "/sbin/modprobe" > /proc/sys/kernel/modprobe -To tell kmod when to unload unused modules, do something like: +To periodically unload unused modules, put something like the following +in root's crontab entry: - echo "120" > /proc/sys/kernel/kmod_unload_delay + 0-59/5 * * * * /sbin/rmmod -a -Kmod only loads and unloads modules. Kerneld could do more (although +Kmod only loads modules. Kerneld could do more (although nothing in the standard kernel used its other features). If you require features such as request_route, we suggest that you take a similar approach. A simple request_route function could be called, diff -u --recursive --new-file v2.1.98/linux/Documentation/locks.txt linux/Documentation/locks.txt --- v2.1.98/linux/Documentation/locks.txt Tue May 13 22:41:00 1997 +++ linux/Documentation/locks.txt Tue Apr 28 14:22:04 1998 @@ -67,7 +67,7 @@ Until an updated version of mount(8) becomes available you may have to apply this patch to the mount sources (based on the version distributed with Rick -Faiths util-linux-2.5 package): +Faith's util-linux-2.5 package): *** mount.c.orig Sat Jun 8 09:14:31 1996 --- mount.c Sat Jun 8 09:13:02 1996 diff -u --recursive --new-file v2.1.98/linux/Documentation/m68k/framebuffer.txt linux/Documentation/m68k/framebuffer.txt --- v2.1.98/linux/Documentation/m68k/framebuffer.txt Tue Feb 17 13:12:43 1998 +++ linux/Documentation/m68k/framebuffer.txt Tue Apr 28 14:22:04 1998 @@ -86,14 +86,15 @@ - You can request and change variable information about the hardware, like visible and virtual geometry, depth, color map format, timing, and so on. - If you try to change that informations, the driver maybe will round up some + If you try to change that information, the driver maybe will round up some values to meet the hardware's capabilities (or return EINVAL if that isn't possible). - You can get and set parts of the color map. Communication is done with 16 - bit per color part (red, green, blue, transparency) to support all existing - hardware. The driver does all the computations needed to bring it into the - hardware (round it down to less bits, maybe throw away transparency). + bits per color part (red, green, blue, transparency) to support all + existing hardware. The driver does all the computations needed to apply + it to the hardware (round it down to less bits, maybe throw away + transparency). All this hardware abstraction makes the implementation of application programs easier and more portable. E.g. the X server works completely on /dev/fb* and @@ -113,8 +114,8 @@ 3. Frame Buffer Resolution Maintenance -------------------------------------- -Frame buffer resolutions are maintained using the utility `fbset'. It allows to -change the video mode properties of the current resolution. It's main usage is +Frame buffer resolutions are maintained using the utility `fbset'. It can +change the video mode properties of the current resolution. Its main usage is to change the current video mode, e.g. during boot up in one of your /etc/rc.* or /etc/init.d/* files. diff -u --recursive --new-file v2.1.98/linux/Documentation/m68k/kernel-options.txt linux/Documentation/m68k/kernel-options.txt --- v2.1.98/linux/Documentation/m68k/kernel-options.txt Tue Mar 10 10:03:30 1998 +++ linux/Documentation/m68k/kernel-options.txt Tue Apr 28 14:22:04 1998 @@ -87,7 +87,7 @@ /dev/adc: -> 0x1c20 (third ACSI device) /dev/add: -> 0x1c30 (forth ACSI device) -The last for names are available only if the kernel has been compiled +The last four names are available only if the kernel has been compiled with Atari and ACSI support. The name must be followed by a decimal number, that stands for the @@ -114,8 +114,8 @@ to /dev/fd0D720 as an abbreviation for floppy driver #0 in DD format, you cannot use this name for specifying the root device, because the kernel cannot see this symlink before mounting the root FS and it -isn't in the table above. If you would use it, the root device weren't -set at all, without error message. Another example: You cannot use a +isn't in the table above. If you use it, the root device will not be +set at all, without an error message. Another example: You cannot use a partition on e.g. the sixth SCSI disk as the root filesystem, if you want to specify it by name. This is, because only the devices up to /dev/sde are in the table above, but not /dev/sdf. Although, you can @@ -561,7 +561,7 @@ Syntax: ataflop=[,[,[,]]] The drive type may be 0, 1, or 2, for DD, HD, and ED, resp. This - setting affects how much buffers are reserved and which formats are + setting affects how many buffers are reserved and which formats are probed (see also below). The default is 1 (HD). Only one drive type can be selected. If you have two disk drives, select the "better" type. @@ -586,12 +586,12 @@ Below, defaults are noted as n/m, where the first value refers to TT-SCSI and the latter to Falcon-SCSI. If an illegal value is given for one parameter, an error message is printed and that one setting is -ignored (other aren't affected). +ignored (others aren't affected). : - This is the maximum number of SCSI commands queued internal to the + This is the maximum number of SCSI commands queued internally to the Atari SCSI driver. A value of 1 effectively turns off the driver - internal multitasking (if it makes problems). Legal values are >= + internal multitasking (if it causes problems). Legal values are >= 1. can be as high as you like, but values greater than times the number of SCSI targets (LUNs) you have don't make sense. Default: 16/8. @@ -632,7 +632,7 @@ 0 means turn off tagged queuing support, all other values > 0 mean use tagged queuing for targets that support it. Default: currently off, but this may change when tagged queuing handling has been - proofed to be reliable. + proved to be reliable. Tagged queuing means that more than one command can be issued to one LUN, and the SCSI device itself orders the requests so they @@ -689,7 +689,7 @@ ST-RAM, even if it's small enough compared to the rest of memory. If ST-RAM swapping is enabled, the kernel usually uses all free -ST-RAM as swap "device". (If the kernel resides in ST-RAM, the region +ST-RAM as swap "device". If the kernel resides in ST-RAM, the region allocated by it is obviously never used for swapping :-) You can also limit this amount by specifying the second parameter, , if you want to use parts of ST-RAM as normal system memory. is @@ -852,8 +852,8 @@ x = clock input in MHz for WD33c93 chip. Normal values would be from 8 through 20. The default value depends on your hostadapter(s), -default for the A3000 internal controller is 14, for the A2091 its 8 -and for the GVP hostadapters its either 8 or 14, depending on the +default for the A3000 internal controller is 14, for the A2091 it's 8 +and for the GVP hostadapters it's either 8 or 14, depending on the hostadapter and the SCSI-clock jumper present on some GVP hostadapters. diff -u --recursive --new-file v2.1.98/linux/Documentation/mandatory.txt linux/Documentation/mandatory.txt --- v2.1.98/linux/Documentation/mandatory.txt Sat Sep 21 23:41:32 1996 +++ linux/Documentation/mandatory.txt Tue Apr 28 14:22:04 1998 @@ -19,7 +19,7 @@ transfer agent must guard against updating the mailbox at the same time, and prevent reading the mailbox while it is being updated. -In a perfect world all process would use and honour a cooperative, or +In a perfect world all processes would use and honour a cooperative, or "advisory" locking scheme. However, the world isn't perfect, and there's a lot of poorly written code out there. @@ -47,8 +47,8 @@ 2. Marking a file for mandatory locking --------------------------------------- -A file is marked as a candidate for mandatory by setting the group-id bit in -its file mode but removing the group-execute bit. This is an otherwise +A file is marked as a candidate for mandatory locking by setting the group-id +bit in its file mode but removing the group-execute bit. This is an otherwise meaningless combination, and was chosen by the System V implementors so as not to break existing user programs. @@ -103,7 +103,7 @@ 2. If a process has locked a region of a file with a mandatory read lock, then other processes are permitted to read from that region. If any of these processes attempts to write to the region it will block until the lock is - released, unless the process has opened the file opened with the O_NONBLOCK + released, unless the process has opened the file with the O_NONBLOCK flag in which case the system call will return immediately with the error status EAGAIN. @@ -145,7 +145,7 @@ 6. Warning! ----------- -Not even root can override a mandatory lock, so runaway process can wreak +Not even root can override a mandatory lock, so runaway processes can wreak havoc if they lock crucial files. The way around it is to change the file permissions (remove the setgid bit) before trying to read or write to it. Of course, that might be a bit tricky if the system is hung :-( diff -u --recursive --new-file v2.1.98/linux/Documentation/mca.txt linux/Documentation/mca.txt --- v2.1.98/linux/Documentation/mca.txt Thu Dec 12 06:51:07 1996 +++ linux/Documentation/mca.txt Tue Apr 28 14:22:04 1998 @@ -57,7 +57,7 @@ and mca_write_pos() are also available for (safer) direct POS access, but their use is _highly_ discouraged. mca_write_pos() is particularly dangerous, as it is possible for adapters to be put in inconsistent -states (i.e. sharing IO address, etc) and may result in crashes, toasted +states (e.g. sharing IO address, etc) and may result in crashes, toasted hardware, and operator injury. User level drivers (such as the AGX X server) can use /proc/mca to find @@ -124,7 +124,7 @@ } Some of the standard MCA information will already be printed, so don't -bother repeating it. Don't try putting in more that 3K of information. +bother repeating it. Don't try putting in more than 3K of information. Enable this function with: mca_set_adapter_procfn( slot, dev_getinfo, dev ); @@ -132,8 +132,8 @@ Disable it with: mca_set_adapter_procfn( slot, NULL, NULL ); -It is also recommended that, even if you don't write a proc function, to -set the name of the adapter (i.e. "PS/2 ESDI Controller") via +It is also recommended, even if you don't write a proc function, to +set the name of the adapter (e.g. "PS/2 ESDI Controller") via mca_set_adapter_name( int slot, char* name ). Up to 30 characters are used. diff -u --recursive --new-file v2.1.98/linux/Documentation/mtrr.txt linux/Documentation/mtrr.txt --- v2.1.98/linux/Documentation/mtrr.txt Wed Dec 31 16:00:00 1969 +++ linux/Documentation/mtrr.txt Tue Apr 28 22:41:33 1998 @@ -0,0 +1,225 @@ +MTRR (Memory Type Range Register) control +17 Dec 1997 +Richard Gooch + + + On Intel Pentium Pro systems the Memory Type Range Registers (MTRRs) + may be used to control processor access to memory ranges. This is + most useful when you have a video (VGA) card on the PCI + bus. Enabling write-combining allows PCI write transfers to be + combined into a larger transfer before bursting over the PCI + bus. This can increase performance of image write operations 2.5 + times or more. + + The CONFIG_MTRR option creates a /proc/mtrr file which may be used + to manipulate your MTRRs. Typically the X server should use + this. This should have a reasonably generic interface so that + similar control registers on other processors can be easily + supported. + + +There are two interfaces to /proc/mtrr: one is an ASCII interface +which allows you to read and write. The other is an ioctl() +interface. The ASCII interface is meant for administration. The +ioctl() interface is meant for C programmes (i.e. the X server). The +interfaces are described below, with sample commands and C code. + +=============================================================================== +Reading MTRRs from the shell: + +% cat /proc/mtrr +reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1 +reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1 +reg05: base=0x80000000 (2048MB), size= 4MB: write-combining, count=1 +=============================================================================== +Creating MTRRs from the shell: +% echo "base=0x80000000 size=0x400000 type=write-combining" >! /proc/mtrr +=============================================================================== +Removing MTRRs from the shell: +% echo "disable=5" >! /proc/mtrr +=============================================================================== +Reading MTRRs from a C programme using ioctl()'s: + +/* mtrr-show.c + + Source file for mtrr-show (example programme to show MTRRs using ioctl()'s) + + Copyright (C) 1997 Richard Gooch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ + +/* + This programme will use an ioctl() on /proc/mtrr to show the current MTRR + settings. This is an alternative to reading /proc/mtrr. + + + Written by Richard Gooch 17-DEC-1997 + + Last updated by Richard Gooch 17-DEC-1997 + + +*/ +#include +#include +#include +#include +#include +#include +#include +#define MTRR_NEED_STRINGS +#include + +#define TRUE 1 +#define FALSE 0 +#define ERRSTRING strerror (errno) + + +int main () +{ + int fd; + struct mtrr_gentry gentry; + + if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 ) + { + if (errno == ENOENT) + { + fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n", + stderr); + exit (1); + } + fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING); + exit (2); + } + for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0; + ++gentry.regnum) + { + if (gentry.size < 1) + { + fprintf (stderr, "Register: %u disabled\n", gentry.regnum); + continue; + } + fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n", + gentry.regnum, gentry.base, gentry.size, + mtrr_strings[gentry.type]); + } + if (errno == EINVAL) exit (0); + fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING); + exit (3); +} /* End Function main */ +=============================================================================== +Creating MTRRs from a C programme using ioctl()'s: + +/* mtrr-add.c + + Source file for mtrr-add (example programme to add an MTRRs using ioctl()) + + Copyright (C) 1997 Richard Gooch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ + +/* + This programme will use an ioctl() on /proc/mtrr to add an entry. The first + available mtrr is used. This is an alternative to writing /proc/mtrr. + + + Written by Richard Gooch 17-DEC-1997 + + Last updated by Richard Gooch 17-DEC-1997 + + +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define MTRR_NEED_STRINGS +#include + +#define TRUE 1 +#define FALSE 0 +#define ERRSTRING strerror (errno) + + +int main (int argc, char **argv) +{ + int fd; + struct mtrr_sentry sentry; + + if (argc != 4) + { + fprintf (stderr, "Usage:\tmtrr-add base size type\n"); + exit (1); + } + sentry.base = strtoul (argv[1], NULL, 0); + sentry.size = strtoul (argv[2], NULL, 0); + for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type) + { + if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break; + } + if (sentry.type >= MTRR_NUM_TYPES) + { + fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]); + exit (2); + } + if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 ) + { + if (errno == ENOENT) + { + fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n", + stderr); + exit (3); + } + fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING); + exit (4); + } + if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1) + { + fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING); + exit (5); + } + fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n"); + sleep (5); + close (fd); + fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n", + stderr); +} /* End Function main */ +=============================================================================== diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/00-INDEX linux/Documentation/networking/00-INDEX --- v2.1.98/linux/Documentation/networking/00-INDEX Sun Feb 2 05:18:29 1997 +++ linux/Documentation/networking/00-INDEX Tue Apr 28 14:22:04 1998 @@ -2,8 +2,14 @@ - this file 3c505.txt - information on the 3Com EtherLink Plus (3c505) driver. +6pack.txt + - info on the 6pack protocol, an alternative to KISS for AX.25 Configurable - info on some of the configurable network parameters +DLINK.txt + - info on the D-Link DE-600/DE-620 parallel port pocket adapters +PLIP.txt + - PLIP: The Parallel Line Internet Protocol device driver alias.txt - info on using alias network devices arcnet-hardware.txt @@ -12,18 +18,58 @@ - info on the using the arcnet driver itself. ax25.txt - info on using AX.25 and NET/ROM code for Linux +baycom.txt + - info on the driver for Baycom style amateur radio modems +cops.txt + - info on the COPS LocalTalk Linux driver +cs89x0.txt + - the Crystal LAN (CS8900/20-based) Ethernet ISA adapter driver +de4x5.txt + - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver +depca.txt + - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver +dgrs.txt + - the Digi International RightSwitch SE-X Ethernet driver +eql.txt + - serial IP load balancing +ethertap.txt + - the Ethertap user space packet reception and transmission driver +ewrk3.txt + - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver +filter.txt + - Linux Socket Filtering framerelay.txt - info on using Frame Relay/Data Link Connection Identifier (DLCI). +ip-sysctl.txt + - /proc/sys/net/ipv4/* variables +ip_dynaddr.txt + - IP dynamic address hack e.g. for auto-dialup links +ipddp.txt + - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation lapb-module.txt - - programming information on the LAPB module. + - programming information of the LAPB module. +ltpc.txt + - the Apple or Farallon LocalTalk PC card driver +multicast.txt + - Behaviour of cards under Multicast ncsa-telnet - notes on how NCSA telnet (DOS) breaks with MTU discovery enabled. net-modules.txt - info and "insmod" parameters for all network driver modules. +policy-routing.txt + - IP policy-based routing ppp.txt - info on what software you should use to run PPP. +pt.txt + - the Gracilis Packetwin AX.25 device driver +routing.txt + - the new routing mechanism shaper.txt - info on the module that can shape/limit transmitted traffic. +smc9.txt + - the driver for SMC's 9000 series of Ethernet cards +soundmodem.txt + - Linux driver for soundcards as AX.25 modems tcp.txt - short blurb on how TCP output takes place. tulip.txt @@ -32,10 +78,13 @@ - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) e'net cards. wan-router.txt - Wan router documentation +wanpipe.txt + - WANPIPE(tm) Multiprotocol WAN Driver for Linux WAN Router +wavelan.txt + - AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver x25.txt - general info on X.25 development. x25-iface.txt - description of the X.25 Packet Layer to LAPB device interface. z8530drv.txt - info about Linux driver for Z8530 based HDLC cards for AX.25 - diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/6pack.txt linux/Documentation/networking/6pack.txt --- v2.1.98/linux/Documentation/networking/6pack.txt Wed Apr 8 19:36:24 1998 +++ linux/Documentation/networking/6pack.txt Tue Apr 28 14:22:04 1998 @@ -9,20 +9,20 @@ 1. What is 6pack, and what are the advantages to KISS? -6pack is a transmission protocol for the data exchange between the PC and +6pack is a transmission protocol for data exchange between the PC and the TNC over a serial line. It can be used as an alternative to KISS. 6pack has two major advantages: -- The PC is given the full control over the radio +- The PC is given full control over the radio channel. Special control data is exchanged between the PC and the TNC so - that the PC knows at any time if the TNC is receiving data, if an TNC + that the PC knows at any time if the TNC is receiving data, if a TNC buffer underrun or overrun has occured, if the PTT is set and so on. This control data is processed at a higher priority than normal data, so a data stream can be interrupted at any time to issue an - important event. This helps to improve the channel access and timing algorithms - as everything is computed in the PC. It would even be possible to experiment with - something completely different than the known CSMA and DAMA channel access - methods. + important event. This helps to improve the channel access and timing + algorithms as everything is computed in the PC. It would even be possible + to experiment with something completely different from the known CSMA and + DAMA channel access methods. This kind of real-time control is especially important to supply several TNCs that are connected between each other and the PC by a daisy chain (however, this feature is not supported yet by the Linux 6pack driver). @@ -55,15 +55,15 @@ of a newly bought TNC does not contain 6pack, so you will have to program an EPROM yourself. The image file for 6pack EPROMs should be available on any packet radio box where PC/FlexNet can be found. The name of -the file is 6pack.bin. This file is copyrighted and maintainend by the FlexNet +the file is 6pack.bin. This file is copyrighted and maintained by the FlexNet team. It can be used under the terms of the license that comes along with PC/FlexNet. Please do not ask me about the internals of this file as I don't know anything about it. I used a textual description of the 6pack protocol to program the Linux driver. TNCs contain a 64kByte EPROM, the lower half of which is used for -TheFirmware/KISS. The upper half is either empty or is sometimes -programmed with a software called TAPR. In the latter case, the TNC +the firmware/KISS. The upper half is either empty or is sometimes +programmed with software called TAPR. In the latter case, the TNC is supplied with a DIP switch so you can easily change between the two systems. When programming a new EPROM, one of the systems is replaced by 6pack. It is useful to replace TAPR, as this software is rarely used @@ -76,7 +76,7 @@ 5. Building and installing the 6pack driver -The driver has been tested with kernel version 2.1.90. Using with older +The driver has been tested with kernel version 2.1.90. Use with older kernels may lead to a compilation error because the interface to a kernel function has been changed in the 2.1.8x kernels. @@ -93,7 +93,7 @@ To use the driver, the kissattach program delivered with the AX.25 utilities has to be modified. -- Do a cd to the directory that keeps the kissattach sources. Edit the +- Do a cd to the directory that holds the kissattach sources. Edit the kissattach.c file. At the top, insert the following lines: #ifndef N_6PACK @@ -110,8 +110,8 @@ Installing the driver: -- Do an insmod 6pack. Look at your - /var/log/messages file to check if the module has printed its initialization message. +- Do an insmod 6pack. Look at your /var/log/messages file to check if the + module has printed its initialization message. - Do a spattach as you would launch kissattach when starting a KISS port. Check if the kernel prints the message '6pack: TNC found'. @@ -130,7 +130,7 @@ different way than they are when the TNC is used with PC/FlexNet. When using FlexNet, the connect LED is on if there is a connection; the status LED is on if there is data in the buffer of the PC's AX.25 engine that has to be -transmitted. Under LinuX, the 6pack layer is beyond the AX.25 layer, +transmitted. Under Linux, the 6pack layer is beyond the AX.25 layer, so the 6pack driver doesn't know anything about connects or data that has not yet been transmitted. Therefore the LEDs are controlled as they are in KISS mode: The connect LED is turned on if data is transferred @@ -143,7 +143,7 @@ operating with data rates on the radio channel of 9600 Baud or higher, the driver may, on certain systems, sometimes print the message '6pack: bad checksum', which is due to data loss if the other station sends two -or more subsequent packets. I have been told that this is due tu a problem +or more subsequent packets. I have been told that this is due to a problem with the serial driver of 2.0.3x kernels. I don't know yet if the problem still exists with 2.1.x kernels, as I have heard that the serial driver code has been changed with 2.1.x. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/DLINK.txt linux/Documentation/networking/DLINK.txt --- v2.1.98/linux/Documentation/networking/DLINK.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/DLINK.txt Tue Apr 28 14:22:04 1998 @@ -52,7 +52,7 @@ 3. FILES IN THIS RELEASE. README.DLINK This file. - de600.c The Source (,may it be with You :-) for the DE-600 + de600.c The Source (may it be with You :-) for the DE-600 de620.c ditto for the DE-620 de620.h Macros for de620.c @@ -78,7 +78,7 @@ modify "linux/drivers/net/CONFIG" accordingly, or adjust the parameters in the "tuning" section in the sources. - If you are going to use the drivers a loadable modules, do _not_ + If you are going to use the drivers as loadable modules, do _not_ enable them while doing "make config", but instead make sure that the drivers are included in "linux/drivers/net/MODULES". diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/PLIP.txt linux/Documentation/networking/PLIP.txt --- v2.1.98/linux/Documentation/networking/PLIP.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/PLIP.txt Tue Apr 28 14:22:04 1998 @@ -29,11 +29,11 @@ It's cheap, it's available everywhere, and it's easy. The PLIP cable is all that's needed to connect two Linux boxes, and it -can be build for very few bucks. +can be built for very few bucks. -Connecting two Linux boxes takes only a seconds decision and a few -minutes work, no need to search for a [supported] netcard. This might -even be especially important in the case of notebooks, where netcard +Connecting two Linux boxes takes only a second's decision and a few +minutes' work, no need to search for a [supported] netcard. This might +even be especially important in the case of notebooks, where netcards are not easily available. Not requiring a netcard also means that apart from connecting the @@ -45,7 +45,7 @@ Doesn't work over a modem, like SLIP and PPP. Limited range, 15 m. Can only be used to connect three (?) Linux boxes. Doesn't connect to -an exiting ethernet. Isn't standard (not even de facto standard, like +an existing ethernet. Isn't standard (not even de facto standard, like SLIP). Performance @@ -150,7 +150,8 @@ To start a transfer the transmitting machine outputs a nibble 0x08. The raises the ACK line, triggering an interrupt in the receiving -machine. The receiving machine disables +machine. The receiving machine disables interrupts and raises its own ACK +line. Restated: diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/alias.txt linux/Documentation/networking/alias.txt --- v2.1.98/linux/Documentation/networking/alias.txt Tue Mar 10 10:03:30 1998 +++ linux/Documentation/networking/alias.txt Tue Apr 28 14:22:04 1998 @@ -11,7 +11,7 @@ 200.1.1.1 alias for eth0 ... # ifconfig eth0:0 200.1.1.1 etc,etc.... - ~~ -> request alias #0 creation (if it not exists) for eth0 + ~~ -> request alias #0 creation (if not yet exists) for eth0 and routing stuff also ... # route add -host 200.1.1.1 dev eth0:0 (if same IP network as main device) @@ -28,7 +28,7 @@ Alias (re-)configuring - Aliases are no real devices, but should be able to configure and + Aliases are not real devices, but programs` should be able to configure and refer to them as usual (ifconfig, route, etc). Relationship with main device diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/baycom.txt linux/Documentation/networking/baycom.txt --- v2.1.98/linux/Documentation/networking/baycom.txt Tue Aug 5 09:49:50 1997 +++ linux/Documentation/networking/baycom.txt Tue Apr 28 14:22:04 1998 @@ -43,7 +43,7 @@ simple. Once installed, four interfaces named bc[0-3] are available. sethdlc from the ax25 utilities may be used to set driver states etc. Users of userland AX.25 stacks may use the net2kiss utility (also available -in the ax25 utilities package) to converts packets of a network interface +in the ax25 utilities package) to convert packets of a network interface to a KISS stream on a pseudo tty. There's also a patch available from me for WAMPES which allows attaching a kernel network interface directly. @@ -72,7 +72,7 @@ the software DCD algorithm (see below). The channel access parameters can be set with sethdlc -a or kissparms. -Note that both utilities interpret the values slightly different. +Note that both utilities interpret the values slightly differently. Hardware DCD versus Software DCD @@ -93,7 +93,7 @@ feeds the DCD input of the PAR96 modem, the use of the hardware DCD circuitry is recommended. -picpar: the picpar modem features a builtin DCD hardware, which is highly +picpar: the picpar modem features builtin DCD hardware, which is highly recommended. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/cops.txt linux/Documentation/networking/cops.txt --- v2.1.98/linux/Documentation/networking/cops.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/cops.txt Tue Apr 28 14:22:04 1998 @@ -1,12 +1,12 @@ Text File for the COPS LocalTalk Linux driver (cops.c). By Jay Schulist -This driver has teo modes and they are: Dayna mode and Tangent mode. +This driver has two modes and they are: Dayna mode and Tangent mode. Each mode corresponds with the type of card. It has been found that there are 2 main types of cards and all other cards are the same and just have different names or only have minor differences such as more IO ports. As this driver is tested it will -become more clear on exactly what cards are supported. +become more clear exactly what cards are supported. Right now these cards are known to work with the COPS driver. The LT-200 cards work in a somewhat more limited capacity than the @@ -20,8 +20,8 @@ Other cards possibly supported mode unkown though: Dayna DL2000 (Full length) -The COPS driver defaults to using Dayna mode. To change the drivers -mode if you build a driver with a dual support use board_type=1 or +The COPS driver defaults to using Dayna mode. To change the driver's +mode if you built a driver with dual support use board_type=1 or board_type=2 for Dayna or Tangent with insmod. ** Operation/loading of the driver. @@ -52,12 +52,12 @@ dummy -seed -phase 2 -net 2000 -addr 2000.10 -zone "1033" lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033" -* For multiple cards, Ethernet and Localtalk. +* For multiple cards, Ethernet and LocalTalk. eth0 -seed -phase 2 -net 3000 -addr 3000.20 -zone "1033" lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033" * For multiple LocalTalk cards, and an Ethernet card. -* Order seems to matters here, Ethernet last. +* Order seems to matter here, Ethernet last. lt0 -seed -phase 1 -net 1000 -addr 1000.10 -zone "LocalTalk1" lt1 -seed -phase 1 -net 2000 -addr 2000.20 -zone "LocalTalk2" eth0 -seed -phase 2 -net 3000 -addr 3000.30 -zone "EtherTalk" diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/cs89x0.txt linux/Documentation/networking/cs89x0.txt --- v2.1.98/linux/Documentation/networking/cs89x0.txt Sun Feb 2 05:18:29 1997 +++ linux/Documentation/networking/cs89x0.txt Tue Apr 28 14:22:04 1998 @@ -40,22 +40,6 @@ 6.3.2 Crystal's Bulletin Board Service -8.3 OBTAINING THE LATEST DRIVER VERSION - -You can obtain the latest CS89XX drivers and support software from Crystal's -BBS or Web site. - - -8.3.1 CRYSTAL'S WEB SITE - -Crystal Semiconductor maintains a web page at http://www.crystal.com with the -the latest drivers and technical publications. - - -8.3.2 CRYSTAL'S BULLETIN BOARD SERVICE - - - 1.0 CRYSTAL LAN CS8900/CS8920 ETHERNET ADAPTERS =============================================================================== @@ -172,7 +156,7 @@ 10BASE-T (10BASE-T only adapter) You should only change the default configuration settings if conflicts with -another adapter exists. To change the adapter's configuration, run the +another adapter exist. To change the adapter's configuration, run the CS8900/20 Setup Utility. @@ -388,7 +372,7 @@ 5.1 KNOWN DEFECTS and LIMITATIONS Refer to the RELEASE.TXT file distributed as part of this archive for a list of -know defects, driver limitations, and work arounds. +known defects, driver limitations, and work arounds. 5.2 TESTING THE ADAPTER diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/de4x5.txt linux/Documentation/networking/de4x5.txt --- v2.1.98/linux/Documentation/networking/de4x5.txt Sun Dec 21 22:37:32 1997 +++ linux/Documentation/networking/de4x5.txt Tue Apr 28 14:22:04 1998 @@ -82,7 +82,7 @@ To unload a module, turn off the associated interface(s) 'ifconfig eth?? down' then 'rmmod de4x5'. - Automedia detection is included so that in principal you can disconnect + Automedia detection is included so that in principle you can disconnect from, e.g. TP, reconnect to BNC and things will still work (after a pause whilst the driver figures out where its media went). My tests using ping showed that it appears to work.... @@ -118,7 +118,7 @@ debt to for the testing and feedback that helped get this feature working. So far we have tested KINGSTON, SMC8432, SMC9332 (with the latest SROM complying with the SROM spec V3: their first was - broken), ZNYX342 and LinkSys. ZYNX314 (dual 21041 MAC) and ZNYX 315 + broken), ZNYX342 and LinkSys. ZNYX314 (dual 21041 MAC) and ZNYX 315 (quad 21041 MAC) cards also appear to work despite their incorrectly wired IRQs. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/eql.txt linux/Documentation/networking/eql.txt --- v2.1.98/linux/Documentation/networking/eql.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/eql.txt Tue Apr 28 14:22:04 1998 @@ -15,7 +15,7 @@ 1. Introduction Which is worse? A huge fee for a 56K leased line or two phone lines? - Its probably the former. If you find yourself craving more bandwidth, + It's probably the former. If you find yourself craving more bandwidth, and have a ISP that is flexible, it is now possible to bind modems together to work as one point-to-point link to increase your bandwidth. All without having to have a special black box on either @@ -64,7 +64,7 @@ -rw-r--r-- guru/ncm 2195 Jan 10 21:48 1995 eql-1.1/eql_enslave.c ______________________________________________________________________ - Unpack a recent kernel (something after 1.1.92) Someplace convenient + Unpack a recent kernel (something after 1.1.92) someplace convenient like say /usr/src/linux-1.1.92.eql. Use symbolic links to point /usr/src/linux to this development directory. @@ -250,13 +250,13 @@ One version of the scheduler was able to push 5.3 K/s through the 28800 and 14400 connections, but when the priorities on the links were - very wide apart (57600 vs. 14400) The "faster" modem received all + very wide apart (57600 vs. 14400) the "faster" modem received all traffic and the "slower" modem starved. - 5. Tester's Reports + 5. Testers' Reports - Some people have experimented with the eql device with newer kernels + Some people have experimented with the eql device with newer kernels (than 1.1.75). I have since updated the driver to patch cleanly in newer kernels because of the removal of the old "slave- balancing" driver config option. @@ -469,7 +469,7 @@ I've installed your patch and it works great. I have trialed it over twin SL/IP lines, just over null modems, but I was able to data at over 48Kb/s [ISDN link -Simon]. I managed a - transfer of upto 7.5 Kbyte/s on one go, but averaged around + transfer of up to 7.5 Kbyte/s on one go, but averaged around 6.4 Kbyte/s, which I think is pretty cool. :) diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/ethertap.txt linux/Documentation/networking/ethertap.txt --- v2.1.98/linux/Documentation/networking/ethertap.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/ethertap.txt Tue Apr 28 14:22:04 1998 @@ -8,14 +8,14 @@ Ethertap provides packet reception and transmission for user space programs. It can be viewed as a simple ethernet device, -which instead of recieving packets from a network wire, it recieves +which instead of receiving packets from a network wire, it receives them from user space. -Ethertap can be used for anything from Appletalk to IPX to even +Ethertap can be used for anything from AppleTalk to IPX to even building bridging tunnels. It also has many other general purpose uses. -Ethertap also can do ARP for you. Although this is not enabled per +Ethertap also can do ARP for you, although this is not enabled by default. SetUp @@ -36,7 +36,7 @@ If you want your Ethertap device to ARP for you would ifconfig the interface like this: ifconfig tap* 192.168.1.1 arp -Remember that the you need to have a corresponding /dev/tap* file +Remember that you need to have a corresponding /dev/tap* file for each tap* device you need to ifconfig. Now Ethertap should be ready to use. @@ -65,7 +65,7 @@ C code for a Simple program using an EtherTap device ==================================================== -This code is just excepts from a real program, so some parts are missing +This code is just excerpts from a real program, so some parts are missing but the important stuff is below. void main (void) diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/filter.txt linux/Documentation/networking/filter.txt --- v2.1.98/linux/Documentation/networking/filter.txt Mon Dec 29 10:22:43 1997 +++ linux/Documentation/networking/filter.txt Tue Apr 28 14:22:04 1998 @@ -18,16 +18,16 @@ LSF is much simpler that BPF. One does not have to worry about devices or anything like that. You simply create your filter code, send it to the kernel via the SO_ATTACH_FILTER ioctl and -if you filter code passes the kernel check on it, you then +if your filter code passes the kernel check on it, you then immediately begin filtering data on that socket. You can also detach filters from your socket via the SO_DETACH_FILTER ioctl. This will probably not be used much since when you close a socket that has a filter on it the -filter is automagicly removed. The other less common case -may be adding a differnt filter on the same socket you had another -filter that is still running, the kernel takes care of removing -the old one and placing your new one in its place, assumming your +filter is automagically removed. The other less common case +may be adding a different filter on the same socket where you had another +filter that is still running: the kernel takes care of removing +the old one and placing your new one in its place, assuming your filter has passed the checks, otherwise if it fails the old filter will remain on that socket. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/ip-sysctl.txt linux/Documentation/networking/ip-sysctl.txt --- v2.1.98/linux/Documentation/networking/ip-sysctl.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/ip-sysctl.txt Tue Apr 28 14:22:04 1998 @@ -49,7 +49,7 @@ not to this host as local ones. It is supposed, that BOOTP relay deamon will catch and forward such packets. - default FASLE + default FALSE Not Implemented Yet. @@ -110,7 +110,7 @@ tcp_syn_retries - INTEGER Number of times initial SYNs for an TCP connection attempt will - be retransmitted. Should not be higher that 255. + be retransmitted. Should not be higher than 255. tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/ipddp.txt linux/Documentation/networking/ipddp.txt --- v2.1.98/linux/Documentation/networking/ipddp.txt Sun Dec 21 17:41:24 1997 +++ linux/Documentation/networking/ipddp.txt Tue Apr 28 14:22:04 1998 @@ -1,42 +1,42 @@ Text file for ipddp.c: - Appletalk-IP Decapsulation and Appletalk-IP Encapsulation + AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation This text file writen by Jay Schulist Introduction ------------ -Appletalk-IP (IPDDP) is the method computers connected to Appletalk -networks can communicate via IP. Appletalk-IP is simply IP datagrams -inside Appletalk packets. +AppleTalk-IP (IPDDP) is the method computers connected to AppleTalk +networks can use to communicate via IP. AppleTalk-IP is simply IP datagrams +inside AppleTalk packets. Through this driver you can either allow your Linux box to communicate -IP over an Appletalk network or you can provide IP gatewaying functions -for you Appletalk users. +IP over an AppleTalk network or you can provide IP gatewaying functions +for your AppleTalk users. -You can currently Encapsulate or Decapsulate Appletalk-IP on LocalTalk, +You can currently Encapsulate or Decapsulate AppleTalk-IP on LocalTalk, EtherTalk and PPPTalk. The only limit on the protocol is that of what -the kernel Appletalk layer and drivers are available. +kernel AppleTalk layer and drivers are available. Each mode requires its own user space software. -Compiling Appletalk-IP Decapsulation/Encapsulation +Compiling AppleTalk-IP Decapsulation/Encapsulation ================================================= -Appletalk-IP Decapsulation needs to be compiled into your kernel. You +AppleTalk-IP Decapsulation needs to be compiled into your kernel. You will need to turn on Appletalk-IP driver support. Then you will need to -select ONE of the two options; IP to Appletalk-IP Encapsulation support or -Appletalk-IP to IP Decapsulation support. If you compile the driver -staticly you will only be able to use the driver for the function you have +select ONE of the two options; IP to AppleTalk-IP Encapsulation support or +AppleTalk-IP to IP Decapsulation support. If you compile the driver +statically you will only be able to use the driver for the function you have enabled in the kernel. If you compile the driver as a module you can select what mode you want it to run in via a module loading param. -ipddp_mode=1 for Appletalk-IP Encapsulation and ipddp_mode=2 for -Appletalk-IP to IP Decapsulation. +ipddp_mode=1 for AppleTalk-IP Encapsulation and ipddp_mode=2 for +AppleTalk-IP to IP Decapsulation. Basic instructions for user space tools ======================================= -To enable Appletalk-IP Decapsulation/Encapsulation you will need the +To enable AppleTalk-IP Decapsulation/Encapsulation you will need the proper tools. You can get the tools for Decapsulation from http://spacs1.spacs.k12.wi.us/~jschlst/MacGate and for Encapsulation from http://www.maths.unm.edu/~bradford/ltpc.html @@ -46,7 +46,7 @@ Decapsulation - You will need to download a software package called MacGate. In this distribution there will be a tool called MacRoute -which enabled you to add routes to the kernel for your Macs by hand. +which enables you to add routes to the kernel for your Macs by hand. Also the tool MacRegGateWay is included to register the proper IP Gateway and IP addresses for your machine. Included in this distribution is a patch to netatalk-1.4b2+asun2.0a17.2 (available from @@ -55,13 +55,13 @@ for locations with large Mac installations) Encapsulation - You will need to download a software daemon called ipddpd. -This software expects there to be and Appletalk-IP gateway on the network. +This software expects there to be an AppleTalk-IP gateway on the network. You will also need to add the proper routes to route your Linux box's IP traffic out the ipddp interface. Common Uses of ipddp.c ---------------------- -Of course Appletalk-IP Decapsulation and Encapsulation, but specificly +Of course AppleTalk-IP Decapsulation and Encapsulation, but specificly Decapsulation is being used most for connecting LocalTalk networks to IP networks. Although it has been used on EtherTalk networks to allow Macs that are only able to tunnel IP over EtherTalk. @@ -70,9 +70,9 @@ network to use IP. It should work equally well if you are stuck on an EtherTalk only network. -Further Assisatance +Further Assistance ------------------- You can contact me (Jay Schulist ) with any questions reguarding Decapsulation or Encapsulation. Bradford W. Johnson originally wrote the ipddp.c driver for IP -encapsulation in Appletalk. +encapsulation in AppleTalk. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/lapb-module.txt linux/Documentation/networking/lapb-module.txt --- v2.1.98/linux/Documentation/networking/lapb-module.txt Thu Jan 2 05:13:24 1997 +++ linux/Documentation/networking/lapb-module.txt Tue Apr 28 14:22:04 1998 @@ -2,7 +2,7 @@ Jonathan Naylor 29.12.96 -The LAPB module will be a seperately compiled module for use by any parts of +The LAPB module will be a separately compiled module for use by any parts of the Linux operating system that require a LAPB service. This document defines the interfaces to, and the services provided by this module. The term module in this context does not imply that the LAPB module is a @@ -73,7 +73,7 @@ and 7 for a standard LAPB link, and between 1 and 127 for an extended LAPB link. -The mode variable is a bit field is used for setting (at present) three values. +The mode variable is a bit field used for setting (at present) three values. The bit fields have the following meanings: Bit Meaning diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/ltpc.txt linux/Documentation/networking/ltpc.txt --- v2.1.98/linux/Documentation/networking/ltpc.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/ltpc.txt Tue Apr 28 14:22:04 1998 @@ -1,8 +1,8 @@ This is the ALPHA version of the ltpc driver. In order to use it, you will need at least version 1.3.3 of the -netatalk package, and the Apple or Farallon Localtalk PC card. -There are a number of different Localtalk cards for the PC; this +netatalk package, and the Apple or Farallon LocalTalk PC card. +There are a number of different LocalTalk cards for the PC; this driver applies only to the one with the 65c02 processor chip on it. To include it in the kernel, select the CONFIG_LTPC switch in the @@ -20,7 +20,7 @@ at bootup. The appropriate netatalk configuration depends on whether you are -attached to a network that includes appletalk routers or not. If, +attached to a network that includes AppleTalk routers or not. If, like me, you are simply connecting to your home Macintoshes and printers, you need to set up netatalk to "seed". The way I do this is to have the lines @@ -29,14 +29,14 @@ ltalk0 -seed -phase 1 -net 1033 -addr 1033.27 -zone "1033" in my atalkd.conf. What is going on here is that I need to fool -netatalk into thinking that there are two appletalk interfaces +netatalk into thinking that there are two AppleTalk interfaces present -- otherwise it refuses to seed. This is a hack, and a more permanent solution would be to alter the netatalk code. Note that the dummy driver needs to accept multicasts also -- earlier versions of dummy.c may need to be patched. -If you are attached to an extended appletalk network, with routers on +If you are attached to an extended AppleTalk network, with routers on it, then you don't need to fool around with this -- the appropriate line in atalkd.conf is @@ -75,7 +75,7 @@ IP: Many people are interested in this driver in order to use IP -when Localtalk, but no Ethernet, is available. While the code to do +when LocalTalk, but no Ethernet, is available. While the code to do this is not strictly speaking part of this driver, an experimental version is available which seems to work under kernel 2.0.xx. It is not yet functional in the 2.1.xx kernels. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/multicast.txt linux/Documentation/networking/multicast.txt --- v2.1.98/linux/Documentation/networking/multicast.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/multicast.txt Tue Apr 28 14:22:04 1998 @@ -52,6 +52,6 @@ znet YES YES YES Software -PROMISC = This multicasts mode is in fact promiscuous mode. Avoid using +PROMISC = This multicast mode is in fact promiscuous mode. Avoid using cards who go PROMISC on any multicast in a multicast kernel. (#) = Hardware multicast support is not used yet. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/net-modules.txt linux/Documentation/networking/net-modules.txt --- v2.1.98/linux/Documentation/networking/net-modules.txt Thu Jun 26 12:33:36 1997 +++ linux/Documentation/networking/net-modules.txt Tue Apr 28 14:22:04 1998 @@ -2,31 +2,31 @@ Linux network driver modules - Do not mistake this to "README.modules" at the top-level + Do not mistake this for "README.modules" at the top-level directory! That document tells about modules in general, while this one tells only about network device driver modules. This is a potpourri of INSMOD-time(*) configuration options (if such exists) and their default values of various modules - on Linux network drivers collection. + in the Linux network drivers collection. Some modules have also hidden (= non-documented) tunable values. - Choice of not documenting them is based on general belief, that - the less user needs to know, the better. (There are things that - driver developer can use, others should not confuse themselves.) + The choice of not documenting them is based on general belief, that + the less the user needs to know, the better. (There are things that + driver developers can use, others should not confuse themselves.) In many cases it is highly preferred that insmod:ing is done ONLY with defining an explicit address for the card, AND BY NOT USING AUTO-PROBING! - Now most cards have some explicitly defined base address, they + Now most cards have some explicitly defined base address that they are compiled with (to avoid auto-probing, among other things). If that compiled value does not match your actual configuration, - do use "io=0xXXX" -parameter for the insmod, and give there + do use the "io=0xXXX" -parameter for the insmod, and give there a value matching your environment. If you are adventurous, you can ask the driver to autoprobe - by using "io=0" parameter, however it is potentially dangerous + by using the "io=0" parameter, however it is a potentially dangerous thing to do in a live system. (If you don't know where the card is located, you can try autoprobing, and after possible crash recovery, insmod with proper IO-address..) diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/policy-routing.txt linux/Documentation/networking/policy-routing.txt --- v2.1.98/linux/Documentation/networking/policy-routing.txt Wed Dec 18 01:45:43 1996 +++ linux/Documentation/networking/policy-routing.txt Tue Apr 28 14:22:04 1998 @@ -127,7 +127,7 @@ It is funny, but pretty useless algorithm. I listed it just to show power of new routing code. -5. All the variaty of combinations...... +5. All the variety of combinations...... GATED diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/pt.txt linux/Documentation/networking/pt.txt --- v2.1.98/linux/Documentation/networking/pt.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/pt.txt Tue Apr 28 14:22:04 1998 @@ -49,7 +49,7 @@ /sbin/route add -host 44.136.8.95 dev pt0b /sbin/route add -host 44.255.255.255 dev pt0b -This version of the driver comes under the GNU GPL. If you have one on my +This version of the driver comes under the GNU GPL. If you have one of my previous (non-GPL) versions of the driver, please update to this one. I hope that this all works well for you. I would be pleased to hear how diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/routing.txt linux/Documentation/networking/routing.txt --- v2.1.98/linux/Documentation/networking/routing.txt Mon Apr 6 17:40:59 1998 +++ linux/Documentation/networking/routing.txt Tue Apr 28 14:22:04 1998 @@ -18,18 +18,18 @@ NEWS for user. -- Policy based routing. Routing decisions are made on the base +- Policy based routing. Routing decisions are made on the basis not only of destination address, but also source address, TOS and incoming interface. - Complete set of IP level control messages. - Now Linux is the only in the world OS comlying to RFC requirements. + Now Linux is the only OS in the world complying to RFC requirements. Great win 8) - New interface addressing paradigm. Assignment of address ranges to interface, multiple prefixes etc. etc. Do not bother, it is compatible with old one. Moreover: -- You more need not make "route add aaa.bbb.ccc... eth0", - it is made automatically. +- You more need not do "route add aaa.bbb.ccc... eth0", + it is done automatically. - "Abstract" UNIX sockets and security enhancements. It is necessary to use TIRPC and TLI emulation library. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/shaper.txt linux/Documentation/networking/shaper.txt --- v2.1.98/linux/Documentation/networking/shaper.txt Thu Dec 12 06:51:07 1996 +++ linux/Documentation/networking/shaper.txt Tue Apr 28 14:22:04 1998 @@ -29,8 +29,8 @@ Gotchas: - The shaper shapes transmitted traffic. Its rather impossible to -shape received traffic except at the end (or a router) transmiting it. + The shaper shapes transmitted traffic. It's rather impossible to +shape received traffic except at the end (or a router) transmitting it. Gated/routed/rwhod/mrouted all see the shaper as an additional device and will treat it as such unless patched. Note that for mrouted you can run diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/soundmodem.txt linux/Documentation/networking/soundmodem.txt --- v2.1.98/linux/Documentation/networking/soundmodem.txt Thu Sep 4 13:25:28 1997 +++ linux/Documentation/networking/soundmodem.txt Tue Apr 28 14:22:04 1998 @@ -16,10 +16,10 @@ The Interface of the driver -The driver provides a kernel network drivers named sm[0-3]. sethdlc +The driver provides kernel network drivers named sm[0-3]. sethdlc from the ax25 utilities may be used to set driver states etc. Users of userland AX.25 stacks may use the net2kiss utility (also available -in the ax25 utilities package) to converts packets of a network interface +in the ax25 utilities package) to convert packets of a network interface to a KISS stream on a pseudo tty. There's also a patch available from me for WAMPES which allows attaching a kernel network interface directly. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/wan-router.txt linux/Documentation/networking/wan-router.txt --- v2.1.98/linux/Documentation/networking/wan-router.txt Mon Jan 12 14:46:16 1998 +++ linux/Documentation/networking/wan-router.txt Tue Apr 28 14:22:04 1998 @@ -30,8 +30,8 @@ then the price of a typical PC box. Alternatively, considering robustness and multitasking capabilities of Linux, -an internal router can be build (most routers use some sort of stripped down -Unix-like operating system anyway). With number of relatively inexpensive WAN +an internal router can be built (most routers use some sort of stripped down +Unix-like operating system anyway). With a number of relatively inexpensive WAN interface cards available on the market, a perfectly usable router can be built for less than half a price of an external router. Yet a Linux box acting as a router can still be used for other purposes, such as firewalling, @@ -39,37 +39,37 @@ This kernel module introduces the notion of a WAN Link Driver (WLD) to Linux operating system and provides generic hardware-independent services for such -drivers. Why existing Linux network device interface can not be used for -this purpose? Well, it can. However, there are few key differences between -typical network interface (i.e. ethernet) and WAN link. +drivers. Why can existing Linux network device interface not be used for +this purpose? Well, it can. However, there are a few key differences between +a typical network interface (e.g. ethernet) and a WAN link. Many WAN protocols, such as X.25 and frame relay, allow for multiple logical connections (known as `virtual circuits' in X.25 terminology) over a single physical link. Each such virtual circuit may (and almost always does) lead -to diffrent geographical location and, therefore, different network. As a +to a different geographical location and, therefore, different network. As a result, it is the virtual circuit, not the physical link, that represents a route and, therefore, a network interface in Linux terms. To further complicate things, virtual cuircits are usually volatile in nature (excluding so called `permanent' virtual circuits or PVCs). With almost no -time required to set up and tear down virtual circuit, it is highly desirable +time required to set up and tear down a virtual circuit, it is highly desirable to implement on-demand connections in order to minimize network charges. So -unlike typical network driver, the WAN driver must be able to handle multiple -network interfaces and cope with multiple virtual circuits come into existance +unlike a typical network driver, the WAN driver must be able to handle multiple +network interfaces and cope as multiple virtual circuits come into existence and go away dynamically. Last, but not least, WAN configuration is much more complex than that of say ethernet and may well amount to several dozens of parameters. Some of them are "link-wide" while others are virtual circuit-specific. The same holds true for WAN statistics which is by far more extensive and extremely useful -when troubleshooting WAN connections. Extending ifconfig utility to suite +when troubleshooting WAN connections. Extending the ifconfig utility to suit these needs may be possible, but does not seem quite reasonable. Therefore, a WAN configuration utility and corresponding application programmer's interface is needed for this purpose. -Most of these problems are taken care of by this module. It's goal is to -provide user with more-or-less standard look and feel for all WAN devices and -assist WAN device driver writer by providing common services, such as: +Most of these problems are taken care of by this module. Its goal is to +provide a user with more-or-less standard look and feel for all WAN devices and +assist a WAN device driver writer by providing common services, such as: o User-level interface via /proc filesystem o Centralized configuration @@ -77,7 +77,7 @@ o Network interface management (dynamic creation/destruction) o Protocol encapsulation/decapsulation -To ba able to use Linux WAN Router you will also need a WAN Tools package +To ba able to use the Linux WAN Router you will also need a WAN Tools package available from ftp.sangoma.com/pub/linux/vX.Y.Z/wantools-X.Y.Z.tgz @@ -112,12 +112,12 @@ This product is based on the WANPIPE(tm) Multiprotocol WAN Router developed by Sangoma Technologies Inc. for Linux 1.2.x. Release of Linux 2.0 in summer 1996 commanded adequate changes to the WANPIPE code to take full advantage of -new Linux features. Instead of continuing developing proprietory interface +new Linux features. Instead of continuing developing proprietary interface specific to Sangoma WAN cards, we decided to put all hardware-independent code into a separate module and define two levels of interfaces - one for user- level applications and another for kernel-level WAN drivers. -Many usefull ideas concerning hardware-independent interface implementation +Many useful ideas concerning hardware-independent interface implementation were given by Mike McLagan and his implementation of the Frame Relay router and drivers for Sangoma cards (dlci/sdla). diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/wanpipe.txt linux/Documentation/networking/wanpipe.txt --- v2.1.98/linux/Documentation/networking/wanpipe.txt Mon Jan 12 14:46:16 1998 +++ linux/Documentation/networking/wanpipe.txt Tue Apr 28 14:22:04 1998 @@ -9,25 +9,26 @@ INTRODUCTION -WANPIPE(tm) is a family of intelligent muliprotocol WAN communication adapters +WANPIPE(tm) is a family of intelligent multiprotocol WAN communication adapters for personal computers (ISA bus) designed to provide PC connectivity to various communication links, such as leased lines and public data networks, at -speeds up to T1/E1 using variety of synchronous communications protocols, +speeds up to T1/E1 using a variety of synchronous communications protocols, including frame relay, PPP, X.25, SDLC, etc. -WANPIPE driver together with Linux WAN Router module allows you to build -relatively inexpensive, yet high-prformance multiprotocol WAN router. For -more information about Linux WAN Router please read file -Documentation/networking/wan-router.txt. You must also obtain WAN Tools -package to be able to use Linux WAN Router and WANPIPE driver. The package +WANPIPE driver together with Linux WAN Router module allows you to build a +relatively inexpensive, yet high-performance multiprotocol WAN router. For +more information about the Linux WAN Router please read the file +Documentation/networking/wan-router.txt. You must also obtain the WAN Tools +package to be able to use the Linux WAN Router and WANPIPE driver. The package is available via the Internet from Sangoma Technologies' anonymous FTP server: ftp.sangoma.com/pub/linux/wantools-X.Y.Z.tgz or ftp.sangoma.com/pub/linux/wanpipe-X.Y.Z.tgz -The name of the package differ only due to naming convention. The functionalityof wantools and wanpipe packages are the same. The latest version of WAN -Drivers is wanpipe-2.0.0. +The names of the packages differ only due to naming convention. The +functionality of wantools and wanpipe packages are the same. The latest +version of the WAN Drivers is wanpipe-2.0.0. For technical questions and/or comments please e-mail to jaspreet@sangoma.com. For general inquiries please contact Sangoma Technologies Inc. by @@ -74,7 +75,7 @@ sdladrv.c SDLA support module source code sdla_fr.c SDLA Frame Relay source code sdla_ppp.c SDLA PPP source code - sdla_x25.c SDLA X25 source code + sdla_x25.c SDLA X.25 source code sdlamain.c SDLA support source code include/linux: @@ -137,7 +138,7 @@ o Added support for synchronous PPP o Added support for S503 adapter o Added API for executing adapter commands - o Fixed a re-entrancy problem in frame relaty driver + o Fixed a re-entrancy problem in frame relay driver o Changed interface between SDLA driver and protocol support modules o Updated frame relay firmware diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/wavelan.txt linux/Documentation/networking/wavelan.txt --- v2.1.98/linux/Documentation/networking/wavelan.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/networking/wavelan.txt Tue Apr 28 14:22:04 1998 @@ -1,18 +1,19 @@ Sun Jul 2 01:38:33 EST 1995 -1. At present the driver autoprobes for a WaveLAN card only at I/O address 0x390. - The version of the card that I use (NCR) supports four I/O addresses (selectable - via a pair of DIP switches). If you want the driver to autoprobe a different - subset of the four valid addresses then you will need to edit - .../drivers/net/wavelan.c (near line 714) and change the initialisation of the - `iobase[]' array. Normally, I use a LILO configuration file directive to - obviate the need for autoprobing entirely, a course of action I heartily - recommend. +1. At present the driver autoprobes for a WaveLAN card only at I/O address + 0x390. The version of the card that I use (NCR) supports four I/O addresses + (selectable via a pair of DIP switches). If you want the driver to + autoprobe a different subset of the four valid addresses then you will need + to edit .../drivers/net/wavelan.c (near line 714) and change the + initialisation of the `iobase[]' array. Normally, I use a LILO + configuration file directive to obviate the need for autoprobing entirely, + a course of action I heartily recommend. -2. By default, the driver uses the Network ID (NWID) stored in the card's Parameter - Storage Area (PSA). However, the PSA NWID can be overridden by a value passed - explicitly as the third numeric argument to LILO's "ether=" directive, either - at the LILO prompt at boot time or within LILO's configuration file. +2. By default, the driver uses the Network ID (NWID) stored in the card's + Parameter Storage Area (PSA). However, the PSA NWID can be overridden by a + value passed explicitly as the third numeric argument to LILO's "ether=" + directive, either at the LILO prompt at boot time or within LILO's + configuration file. For example, the following line from such a LILO configuration file would auto-configure the IRQ value, set the I/O base to 0x390 and set the NWID to 0x4321, all on a WaveLAN card labelled "eth0": diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/x25-iface.txt linux/Documentation/networking/x25-iface.txt --- v2.1.98/linux/Documentation/networking/x25-iface.txt Thu Jan 2 05:13:24 1997 +++ linux/Documentation/networking/x25-iface.txt Tue Apr 28 14:22:04 1998 @@ -7,8 +7,8 @@ setting of the LAPB mode from within the Packet Layer. The X.25 device driver will be coded normally as per the Linux device driver -standards, most X.25 device drivers will be moderately similar to the -already existing Eethernet device drivers. However unlike those drivers, the +standards. Most X.25 device drivers will be moderately similar to the +already existing Ethernet device drivers. However unlike those drivers, the X.25 device driver has a state associated with it, and this information needs to be passed to and from the Packet Layer for proper operation. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/x25.txt linux/Documentation/networking/x25.txt --- v2.1.98/linux/Documentation/networking/x25.txt Mon Jul 7 08:19:59 1997 +++ linux/Documentation/networking/x25.txt Tue Apr 28 14:22:04 1998 @@ -19,7 +19,7 @@ written which will allow X.25 to be run over an Ethernet (or Token Ring) and conform with the JNT "Pink Book", this will have a different interface to the Packet Layer but there will be no confusion since the class of device -being served by the LLC will be completely seperate from LAPB. The LLC +being served by the LLC will be completely separate from LAPB. The LLC implementation is being done as part of another protocol project (SNA) and by a different author. diff -u --recursive --new-file v2.1.98/linux/Documentation/networking/z8530drv.txt linux/Documentation/networking/z8530drv.txt --- v2.1.98/linux/Documentation/networking/z8530drv.txt Wed Feb 4 11:35:59 1998 +++ linux/Documentation/networking/z8530drv.txt Tue Apr 28 14:22:04 1998 @@ -232,7 +232,7 @@ gencfg 2 0x300 2 4 5 -4 0 7 4915200 0x10 -does the same for the BAYCOM USCC card. I my opinion it is much easier +does the same for the BAYCOM USCC card. In my opinion it is much easier to edit scc_config.h... @@ -318,9 +318,9 @@ ======================= Since the TTY driver (aka KISS TNC emulation) is gone you need -to emulate the old behaviour. The cost using these programs is -that you probably need to compile the kernel AX.25, regardless -if you actually use it or not. First setup your /etc/ax25/axports, +to emulate the old behaviour. The cost of using these programs is +that you probably need to compile the kernel AX.25, regardless of whether +you actually use it or not. First setup your /etc/ax25/axports, for example: 9k6 dl0tha-9 9600 255 4 9600 baud port (scc3) @@ -406,7 +406,7 @@ An overrun is abnormal. If lots of these occur, the product of baudrate and number of interfaces is too high for the processing -power of you computer. NoSpace errors are unlikely caused by the +power of your computer. NoSpace errors are unlikely to be caused by the driver or the kernel AX.25. @@ -559,7 +559,7 @@ group: It is possible to build special radio equipment to use more than - one frequency on the same bad, e.g. using several receivers and + one frequency on the same band, e.g. using several receivers and only one transmitter that can be switched between frequencies. Also, you can connect several radios that are active on the same band. In these cases, it is not possible, or not a good idea, to @@ -617,7 +617,7 @@ (i.e. Amstrad) Those systems have a bogus AT bus timing which will lead to delayed answers on interrupts. You can recognize these problems by looking at the output of Sccstat for the suspected -port. See if it shows under- and overruns you own such a system. +port. If it shows under- and overruns you own such a system. Delayed processing of received data: This depends on @@ -634,7 +634,7 @@ - using information from rxecho or kissbridge. -Kernel panics: please read to /linux/README and find out if it +Kernel panics: please read /linux/README and find out if it really occurred within the scc driver. If you cannot solve a problem, send me diff -u --recursive --new-file v2.1.98/linux/Documentation/nfsroot.txt linux/Documentation/nfsroot.txt --- v2.1.98/linux/Documentation/nfsroot.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/nfsroot.txt Tue Apr 28 14:22:04 1998 @@ -81,7 +81,7 @@ This parameter tells the kernel how to configure IP addresses of devices and also how to set up the IP routing table. It was originally called `nfsaddrs', - but now the boot-time IP configuration works independently on NFS, so it + but now the boot-time IP configuration works independently of NFS, so it was renamed to `ip' and the old name remained as an alias for compatibility reasons. @@ -106,14 +106,14 @@ the address of the server is used which answered the RARP or BOOTP request. - IP address of a gateway if the server in on a different + IP address of a gateway if the server is on a different subnet. If this entry is empty no gateway is used and the server is assumed to be on the local network, unless a value has been received by BOOTP. Netmask for local network interface. If this is empty, the netmask is derived from the client IP address assuming - classful addressing, unless overriden in BOOTP reply. + classful addressing, unless overridden in BOOTP reply. Name of the client. If empty, the client IP address is used in ASCII-notation, or the value received by BOOTP. diff -u --recursive --new-file v2.1.98/linux/Documentation/oops-tracing.txt linux/Documentation/oops-tracing.txt --- v2.1.98/linux/Documentation/oops-tracing.txt Fri Sep 6 09:19:04 1996 +++ linux/Documentation/oops-tracing.txt Tue Apr 28 14:22:04 1998 @@ -41,7 +41,7 @@ disassembly. Now, the trick is just then to combine all the data you have: the C -sources (and general knowledge of what it _should_ do, the assembly +sources (and general knowledge of what it _should_ do), the assembly listing and the code disassembly (and additionally the register dump you also get from the "oops" message - that can be useful to see _what_ the corrupted pointers were, and when you have the assembler listing you can diff -u --recursive --new-file v2.1.98/linux/Documentation/paride.txt linux/Documentation/paride.txt --- v2.1.98/linux/Documentation/paride.txt Fri Jan 30 11:28:05 1998 +++ linux/Documentation/paride.txt Tue Apr 28 14:22:04 1998 @@ -146,10 +146,10 @@ If you happen to be using a MicroSolutions backpack device, you will also need to know the unit ID number for each drive. This is usually -the last two digits of the drive's serial number (but read MicroSolution's +the last two digits of the drive's serial number (but read MicroSolutions' documentation about this). -As an example, lets assume that you have a MicroSolutions PD/CD drive +As an example, let's assume that you have a MicroSolutions PD/CD drive with unit ID number 36 connected to the parallel port at 0x378, a SyQuest EZ-135 connected to the chained port on the PD/CD drive and also an Imation Superdisk connected to port 0x278. You could give the following diff -u --recursive --new-file v2.1.98/linux/Documentation/pci.txt linux/Documentation/pci.txt --- v2.1.98/linux/Documentation/pci.txt Thu Apr 23 20:21:27 1998 +++ linux/Documentation/pci.txt Tue Apr 28 14:22:04 1998 @@ -22,7 +22,7 @@ In case you want to do some complex matching, look at pci_devices -- it's a linked list of pci_dev structures for all PCI devices in the system. - All these methods return pointer to a pci_dev structure which is used as a + All these methods return a pointer to a pci_dev structure which is used as a parameter for many other PCI functions. The rest of them accept bus and device/function numbers which can be found in pci_dev->bus->number and pci_dev->devfn. Feel free to use all other fields of the pci_dev structure, but @@ -34,8 +34,8 @@ 2. How to access PCI config space ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use pci_(read|write)_config_(byte|word|dword) to access the config -space of device represented by pci_dev. All these functions return 0 when -successfull or an error code (PCIBIOS_...) which can be translated to text +space of a device represented by pci_dev. All these functions return 0 when +successful or an error code (PCIBIOS_...) which can be translated to a text string by pcibios_strerror. Most drivers expect that accesses to valid PCI devices don't fail. diff -u --recursive --new-file v2.1.98/linux/Documentation/powerpc/00-INDEX linux/Documentation/powerpc/00-INDEX --- v2.1.98/linux/Documentation/powerpc/00-INDEX Mon Jan 12 15:18:12 1998 +++ linux/Documentation/powerpc/00-INDEX Tue Apr 28 14:22:04 1998 @@ -1,5 +1,5 @@ Index of files in Documentation/powerpc. If you think something about -Linux/PPC needs an entry here, needs correction of you've written one +Linux/PPC needs an entry here, needs correction or you've written one please mail me. Cort Dougan (cort@cs.nmt.edu) diff -u --recursive --new-file v2.1.98/linux/Documentation/ramdisk.txt linux/Documentation/ramdisk.txt --- v2.1.98/linux/Documentation/ramdisk.txt Mon May 6 02:26:01 1996 +++ linux/Documentation/ramdisk.txt Tue Apr 28 14:22:04 1998 @@ -31,7 +31,7 @@ Also, the new ramdisk supports up to 16 ramdisks out of the box, and can be reconfigured in rd.c to support up to 255 ramdisks. To use multiple ramdisk support with your system, run 'mknod /dev/ramX b 1 X' and chmod -(to change it's permissions) it to your liking. The default /dev/ram(disk) +(to change its permissions) it to your liking. The default /dev/ram(disk) uses minor #1, so start with ram2 and go from there. The old "ramdisk=" has been changed to "ramdisk_size=" @@ -42,7 +42,7 @@ allowing one to squeeze more programs onto an average installation or rescue floppy disk. -Notes: You may have "dev/ram" or "/dev/ramdisk" or both. They are +Notes: You may have "/dev/ram" or "/dev/ramdisk" or both. They are equivalent from the standpoint of this document. Also, the new ramdisk is a config option. When running "make config", make sure you enable ramdisk support for the kernel you intend to use the ramdisk with. @@ -113,7 +113,7 @@ indicates whether a prompt/wait sequence is to be given before trying to read the ramdisk. Since the ramdisk dynamically grows as data is being written into it, a size field is no longer required. Bits 11 -to 13 are not presently used and may as well be zero. These numbers +to 13 are not currently used and may as well be zero. These numbers are no magical secrets, as seen below: ./arch/i386/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF @@ -160,7 +160,7 @@ restriction does not apply. a) Decide on the ramdisk size that you want. Say 2MB for this example. - Create it by writing to the ramdisk device. (This step is not presently + Create it by writing to the ramdisk device. (This step is not currently required, but may be in the future.) It is wise to zero out the area (esp. for disks) so that maximal compression is achieved for the unused blocks of the image that you are about to create. diff -u --recursive --new-file v2.1.98/linux/Documentation/riscom8.txt linux/Documentation/riscom8.txt --- v2.1.98/linux/Documentation/riscom8.txt Tue Mar 10 10:03:30 1998 +++ linux/Documentation/riscom8.txt Tue Apr 28 14:22:04 1998 @@ -13,20 +13,20 @@ as module use insmod options "iobase=0xXXX iobase1=0xXXX iobase2=..." 2) The driver partially supports famous 'setserial' program, you can use almost - any it option, exclude port & irq settings. + any of its options, excluding port & irq settings. 3) There are some misc. defines at the beginning of riscom8.c, please read the comments and try to change some of them in case of problems. 4) I consider the current state of the driver as BETA. - If you REALLY think you found the bug, send me e-mail, I hope I'll + If you REALLY think you found a bug, send me e-mail, I hope I'll fix it. For any other problems please ask support@sdlcomm.com. 5) SDL Communications WWW page is http://www.sdlcomm.com. 6) You can use the script at the end of this file to create RISCom/8 devices. -7) Minors number for 1-st board are 0-7, for second 8-15, etc. +7) Minor numbers for first board are 0-7, for second 8-15, etc. 22 Apr 1996. diff -u --recursive --new-file v2.1.98/linux/Documentation/scsi.txt linux/Documentation/scsi.txt --- v2.1.98/linux/Documentation/scsi.txt Thu Sep 21 22:04:12 1995 +++ linux/Documentation/scsi.txt Tue Apr 28 14:22:04 1998 @@ -5,7 +5,7 @@ The scsi-core contains the core of scsi support. Without it you can do nothing with any of the other scsi drivers. The scsi core -support can be a module (scsi_mod.o), or it can be build into the kernel. +support can be a module (scsi_mod.o), or it can be built into the kernel. If the core is a module, it must be the first scsi module loaded, and if you unload the modules, it will have to be the last one unloaded. diff -u --recursive --new-file v2.1.98/linux/Documentation/smp linux/Documentation/smp --- v2.1.98/linux/Documentation/smp Sun Feb 2 05:18:29 1997 +++ linux/Documentation/smp Tue Apr 28 14:22:04 1998 @@ -8,7 +8,7 @@ for another kernel image called "linux-smp" or something. The next time you compile the kernel, when running a SMP kernel, -edit linux/Makefile and change "MAKE=make" "MAKE=make -jN" +edit linux/Makefile and change "MAKE=make" to "MAKE=make -jN" (where N = number of CPU + 1, or if you have tons of memory/swap you can just use "-j" without a number). Feel free to experiment with this one. @@ -20,4 +20,4 @@ If you are using some Compaq MP compliant machines you will need to set the operating system in the BIOS settings to "Unixware" - don't ask me -why Compaq's dont work otherwise. +why Compaqs don't work otherwise. diff -u --recursive --new-file v2.1.98/linux/Documentation/smp.tex linux/Documentation/smp.tex --- v2.1.98/linux/Documentation/smp.tex Thu Jun 6 04:57:43 1996 +++ linux/Documentation/smp.tex Tue Apr 28 14:22:04 1998 @@ -39,11 +39,11 @@ supporting multiprocessing, including hardware cache coherency, built in interprocessor interrupt handling and a set of atomic test and set, exchange and similar operations. The cache coherency in particular makes the -operating systems job far easier. +operating system's job far easier. The specification defines a detailed configuration structure in ROM that the boot up processor can read to find the full configuration of the -processors and busses. It also defines a procedure for starting up the +processors and buses. It also defines a procedure for starting up the other processors. @@ -53,7 +53,7 @@ them at once and for example allocating the same memory block. There are two strategies for this within current Unix and Unixlike kernels. Traditional unix systems from the earliest of days use a scheme of 'Coarse -Grained Locking' where the entire kernel is protected as a small number of +Grained Locking' where the entire kernel is protected by a small number of locks only. Some modern systems use fine grained locking. Because fine grained locking has more overhead it is normally used only on multiprocessor kernels and real time kernels. In a real time kernel the @@ -64,7 +64,7 @@ kernel mode will be pre-empted by another kernel mode process unless it voluntarily sleeps. This ensures that blocks of kernel code are effectively atomic with respect to other processes and greatly simplifies -many operation. Secondly interrupts may pre-empt a kernel running process, +many operations. Secondly interrupts may pre-empt a kernel running process, but will always return to that process. A process in kernel mode may disable interrupts on the processor and guarantee such an interruption will not occur. The final guarantee is that an interrupt will not be pre-empted @@ -124,7 +124,7 @@ probably need to be modified in existing kernels to cope with this. -Each additional CPU the calls the architecture specific function +Each additional CPU then calls the architecture specific function {\tt \bf void smp\_callin(void)} @@ -142,7 +142,7 @@ \subsubsection{Scheduling} -The kernel scheduler implements a simple but very and effective task +The kernel scheduler implements a simple but very effective task scheduler. The basic structure of this scheduler is unchanged in the multiprocessor kernel. A processor field is added to each task, and this maintains the number of the processor executing a given task, or a magic @@ -185,7 +185,7 @@ {\tt \bf int smp\_processor\_id(void) } -which returns the identity of the process the call is executed upon. This +which returns the identity of the processor the call is executed upon. This call is assumed to be valid at all times. This may mean additional tests are needed during initialisation. @@ -203,7 +203,7 @@ \subsection{Architecture Specific Code For the Intel MP Port} -The architecture specific code for the intel port splits fairly cleanly +The architecture specific code for the Intel port splits fairly cleanly into four sections. Firstly the initialisation code used to boot the system, secondly the message handling and support code, thirdly the interrupt and kernel syscall entry function handling and finally the @@ -286,7 +286,7 @@ causes a specific deadlock problem. The lock owner may need to send an invalidate request to the rest of the processors and wait for these to complete before continuing. A processor spinning on the lock would not be -able to do thus. Thus the loop of the spinlock tests and handles invalidate +able to do this. Thus the loop of the spinlock tests and handles invalidate requests. If the invalidate bit for the spinning CPU is set the processor invalidates its TLB and atomically clears the bit. When the spinlock is obtained that processor will take an IPI and in the IPI test the bit and @@ -341,6 +341,6 @@ The /proc filesystem support is changed so that the /proc/cpuinfo file contains a column for each processor present. This information is extracted -from the data save by smp\_store\_cpu\_info(). +from the data saved by smp\_store\_cpu\_info(). \end{document} diff -u --recursive --new-file v2.1.98/linux/Documentation/sound/AWE32 linux/Documentation/sound/AWE32 --- v2.1.98/linux/Documentation/sound/AWE32 Wed Apr 8 19:36:24 1998 +++ linux/Documentation/sound/AWE32 Tue Apr 28 14:22:04 1998 @@ -18,7 +18,7 @@ alias char-major-14 sb post-install sb modprobe "-k" "adlib_card" options sb io=0x220 irq=5 dma=1 dma16=5 mpu_io=0x330 -options adlib_card io=0x388 # FM synthetiser +options adlib_card io=0x388 # FM synthesiser and then these two commands can be issued: diff -u --recursive --new-file v2.1.98/linux/Documentation/sound/Opti linux/Documentation/sound/Opti --- v2.1.98/linux/Documentation/sound/Opti Wed Apr 8 19:36:24 1998 +++ linux/Documentation/sound/Opti Tue Apr 28 14:22:04 1998 @@ -24,7 +24,7 @@ Compiling the sound driver -------------------------- I highly recommend that you build a modularized sound driver. -This document does not cover sound-driver which is built in +This document does not cover a sound-driver which is built in the kernel. Sound card support should be enabled as a module (chose m). @@ -95,7 +95,7 @@ contains only common code which is needed by all the sound drivers, and the driver for /dev/sndstat. -The sound module in it's turn will request loading of a sub-driver +The sound module in its turn will request loading of a sub-driver for mixer, audio, midi or synthesizer device. The first 3 are supported by the mad16 driver. The synth device is supported by the opl3 driver. @@ -105,7 +105,7 @@ options sb mad16=1 -This is left for historical reason. If you enable the +This is left for historical reasons. If you enable the config option 'Support MIDI in older MAD16 based cards (requires SB)' or if you use an older mad16 driver it will force loading of the SoundBlaster driver. This option tells the SB driver not to look @@ -154,11 +154,11 @@ the mad16 driver (use "modprobe mad16" to prevent auto-unloading) before the cdrom is accessed the first time. -Using the sound driver built-in the kernel may help here. but... +Using the sound driver built-in to the kernel may help here, but... Most new systems have a PnP bios and also two IDE controllers. The IDE controller on the sound card may be needed only on older systems (which have only one IDE controller) but these systems -also do not have a PnP bios - requiring isapnptoosl and a modularized +also do not have a PnP bios - requiring isapnptools and a modularized driver. Known problems @@ -167,8 +167,8 @@ 2. On my system the codec cannot capture companded sound samples. (eg., recording from /dev/audio). When any companded capture is - requested I get a stereo-16 bit samples instead. Playback of - companded samples work well. Apparently this problem is not common + requested I get stereo-16 bit samples instead. Playback of + companded samples works well. Apparently this problem is not common to all C931 based cards. I do not know how to identify cards that have this problem. diff -u --recursive --new-file v2.1.98/linux/Documentation/sound/Soundblaster linux/Documentation/sound/Soundblaster --- v2.1.98/linux/Documentation/sound/Soundblaster Thu Mar 26 15:57:02 1998 +++ linux/Documentation/sound/Soundblaster Tue Apr 28 14:22:04 1998 @@ -4,7 +4,7 @@ insmod sb ... This loads the driver for the soundblaster and assorted clones. Cards that -are covered by other drivers should not be using with this driver. +are covered by other drivers should not be using this driver. The soundblaster module takes the following arguments @@ -34,5 +34,5 @@ Avance Logic ALS007 -This card isnt currently supported. I have patches to merge however that +This card isn't currently supported. I have patches to merge however that add limited support. diff -u --recursive --new-file v2.1.98/linux/Documentation/sound/mwave linux/Documentation/sound/mwave --- v2.1.98/linux/Documentation/sound/mwave Wed Apr 8 19:36:24 1998 +++ linux/Documentation/sound/mwave Tue Apr 28 14:22:04 1998 @@ -36,7 +36,7 @@ BootGUI=0 [Note msdos.sys IS a text file but it needs to be 'unhidden' and make - read-writable before it can be eddited] + read-writable before it can be edited] Edit Config .sys to have multiple config menus. I have one for win95, and five for linux. Like this: @@ -123,7 +123,7 @@ # c:\linux\boot\zImage.krn # first value must be the filename of the Linux-kernel root=/dev/hda3 # the device which gets mounted as root FS -ro # Other kernel agruments go here +ro # Other kernel arguments go here apm=off doc=yes 3 @@ -170,7 +170,7 @@ Reboot to Win95 and choose Linux. When booted, use sndconfig to configure the sound modules and VOILA - ThinkPad sound with Linux. -Now the gottchas - You can either have CD sound OR Mixers but not both. Thats a +Now the gotchas - You can either have CD sound OR Mixers but not both. That's a problem with the SB1.5(CD sound) or SBPRO(Mixers) settings. No-one knows why this is! diff -u --recursive --new-file v2.1.98/linux/Documentation/sound/ultrasound linux/Documentation/sound/ultrasound --- v2.1.98/linux/Documentation/sound/ultrasound Tue Apr 14 14:29:19 1998 +++ linux/Documentation/sound/ultrasound Tue Apr 28 14:22:04 1998 @@ -3,7 +3,7 @@ insmod ad1848 insmod gus io=* irq=* dma=* ... -This loads the driver for the Gravis Ultrasound familily of soundcards. +This loads the driver for the Gravis Ultrasound family of soundcards. The gus modules takes the following arguments @@ -22,7 +22,7 @@ This option defaults to a value of 0, which allows the Ultrasound wavetable DSP to use DMA for for playback and downloading samples. This is the same as the old behaviour. If set to 1, no DMA is needed for downloading samples, -and allows owners of a GUS MAX to make use of simultanious digital audio +and allows owners of a GUS MAX to make use of simultaneous digital audio (/dev/dsp), MIDI, and wavetable playback. diff -u --recursive --new-file v2.1.98/linux/Documentation/specialix.txt linux/Documentation/specialix.txt --- v2.1.98/linux/Documentation/specialix.txt Tue Dec 2 09:19:03 1997 +++ linux/Documentation/specialix.txt Tue Apr 28 14:22:04 1998 @@ -44,10 +44,10 @@ kernel sources? And the manual of one of the boards in your computer? -Adresses and interrupts -======================= +Addresses and interrupts +======================== -Addres dip switch settings: +Address dip switch settings: The dip switch sets bits 2-9 of the IO address. 9 8 7 6 5 4 3 2 @@ -111,7 +111,7 @@ fact is a divided by two mode). This is not enough to reach the rated 115k2 on all ports at the same time. With this clock rate you can only do 37% of this rate. This means that at 115k2 on all ports you are -going to loose characters (The chip cannot handle that many incoming +going to lose characters (The chip cannot handle that many incoming bits at this clock rate.) (Yes, you read that correctly: there is a limit to the number of -=bits=- per second that the chip can handle.) @@ -129,7 +129,7 @@ The three characters that have the "^^^" under them have suffered a bit error in the highest bit. In conclusion: I've tested it, and found -that it simply DOESN"T work for me. I also suspect that this is also +that it simply DOESN'T work for me. I also suspect that this is also caused by the baud rate being just a little bit out of tune. diff -u --recursive --new-file v2.1.98/linux/Documentation/spinlocks.txt linux/Documentation/spinlocks.txt --- v2.1.98/linux/Documentation/spinlocks.txt Mon Jan 12 14:46:16 1998 +++ linux/Documentation/spinlocks.txt Tue Apr 28 14:22:05 1998 @@ -6,7 +6,7 @@ > SMP safe as well as UP safe during interrupts and other manipulating > routines. So far, I've added a spin_lock variable to things like my queue > structs. Now, from what I recall, there are some spin lock functions I can -> use to lock these spin locks frmo other use as oppossed to a (nasty) +> use to lock these spin locks from other use as opposed to a (nasty) > save_flags(); cli(); stuff; restore_flags(); construct. Where do I find > these routines and go about making use of them? Do they only lock on a > per-processor basis or can they also lock say an interrupt routine from @@ -25,7 +25,7 @@ ... critical section here .. spin_unlock_irqrestore(&xxx_lock, flags); -and the above is always safe. It will disable interrupt _locally_, but the +and the above is always safe. It will disable interrupts _locally_, but the spinlock itself will guarantee the global lock, so it will guarantee that there is only one thread-of-control within the region(s) protected by that lock. diff -u --recursive --new-file v2.1.98/linux/Documentation/stallion.txt linux/Documentation/stallion.txt --- v2.1.98/linux/Documentation/stallion.txt Thu Mar 26 15:57:02 1998 +++ linux/Documentation/stallion.txt Tue Apr 28 14:22:05 1998 @@ -94,7 +94,7 @@ When the new kernel is booted, or the loadable module loaded then the driver will emit some kernel trace messages about whether the configured -boards where detected or not. Depending on how your system logger is set +boards were detected or not. Depending on how your system logger is set up these may come out on the console, or just be logged to /var/adm/messages. You should check the messages to confirm that all is well. @@ -141,7 +141,7 @@ The higher than 1Mb memory addresses are fully supported by this driver. Just enter the address as you normally would for a lower than 1Mb address -(in the drivers board configuration structure). +(in the driver's board configuration structure). @@ -165,7 +165,7 @@ The intelligent boards also need to have their "firmware" code downloaded to them. This is done via a user level application supplied in the driver -utility package called "stlload". Compile this program where ever you dropped +utility package called "stlload". Compile this program wherever you dropped the package files, by typing "make". In its simplest form you can then type ./stlload -i cdk.sys in this directory and that will download board 0 (assuming board 0 is an @@ -220,7 +220,7 @@ Since this driver tries to emulate the standard serial ports as much as possible, most system utilities should work as they do for the standard -COM ports. Most importantly "stty" works as expected and "setserial" can be +COM ports. Most importantly "stty" works as expected and "setserial" can also be used (excepting the ability to auto-configure the I/O and IRQ addresses of boards). Higher baud rates are supported in the usual fashion through setserial or using the CBAUDEX extensions. Note that the EasyIO and diff -u --recursive --new-file v2.1.98/linux/Documentation/svga.txt linux/Documentation/svga.txt --- v2.1.98/linux/Documentation/svga.txt Sat Nov 29 10:33:18 1997 +++ linux/Documentation/svga.txt Tue Apr 28 14:22:05 1998 @@ -5,8 +5,8 @@ 1. Intro ~~~~~~~~ This small document describes the "Video Mode Selection" feature which -allows to use various special video modes supported by the video BIOS. Due -to usage of the BIOS, the selection is limited to the boot time (before the +allows the use of various special video modes supported by the video BIOS. Due +to usage of the BIOS, the selection is limited to boot time (before the kernel decompression starts) and works only on 80X86 machines. The video mode to be used is selected by a kernel parameter which can be @@ -58,16 +58,16 @@ how to enable it if you really want) as it's inherently unreliable due to absolutely insane PC design. - "0 0F00 80x25" tells that the first menu item (the menu items are numbered + "0 0F00 80x25" means that the first menu item (the menu items are numbered from "0" to "9" and from "a" to "z") is a 80x25 mode with ID=0x0f00 (see the next section for a description of mode ID's). - encourages you to write the item number or mode ID + encourages you to enter the item number or mode ID you wish to set and press . If the computer complains something about -"Unknown mode ID", it tries to explain you that it isn't possible to set such +"Unknown mode ID", it is trying to tell you that it isn't possible to set such a mode. It's also possible to press only which leaves the current mode. - The mode list usually contains only few basic modes and some VESA modes. In + The mode list usually contains a few basic modes and some VESA modes. In case your chipset has been detected, some chipset-specific modes are shown as well (some of these might be missing or unusable on your machine as different BIOSes are often shipped with the same card and the mode numbers depend purely @@ -173,7 +173,7 @@ CONFIG_VIDEO_LOCAL - enables inclusion of "local modes" in the list. The local modes are added automatically to the beginning of the list not depending -by hardware configuration. The local modes are listed in the source text after +on hardware configuration. The local modes are listed in the source text after the "local_mode_table:" line. The comment before this line describes the format of the table (which also includes a video card name to be displayed on the top of the menu). @@ -201,7 +201,7 @@ In either case, please send me a bug report containing what _exactly_ happens and how do the configuration switches affect the behaviour of the bug. - If you start Linux from the M$-DOS, you might also use some DOS tools for + If you start Linux from M$-DOS, you might also use some DOS tools for video mode setting. In this case, you must specify the 0x0f04 mode ("leave current settings") to Linux, because if you don't and you use any non-standard mode, Linux will switch to 80x25 automatically. diff -u --recursive --new-file v2.1.98/linux/Documentation/sysctl/kernel.txt linux/Documentation/sysctl/kernel.txt --- v2.1.98/linux/Documentation/sysctl/kernel.txt Thu Apr 23 20:21:27 1998 +++ linux/Documentation/sysctl/kernel.txt Tue Apr 28 14:22:05 1998 @@ -25,7 +25,6 @@ - inode-max - inode-nr - inode-state -- kmod_unload_delay ==> Documentation/kmod.txt - modprobe ==> Documentation/kmod.txt - osrelease - ostype @@ -44,7 +43,7 @@ sent to the init(1) program to handle a graceful restart. When, however, the value is > 0, Linux's reaction to a Vulcan Nerve Pinch (tm) will be an immediate reboot, without even -syncing it's dirty buffers. +syncing its dirty buffers. Note: when a program (like dosemu) has the keyboard in 'raw' mode, the ctrl-alt-del is intercepted by the program before it @@ -69,7 +68,7 @@ Dentries are dynamically allocated and deallocated, and nr_dentry seems to be 0 all the time. Hence it's safe to assume that only nr_unused, age_limit and want_pages are -used. Nr_unused seems to be exactly what it's name says. +used. Nr_unused seems to be exactly what its name says. Age_limit is the age in seconds after which dcache entries can be reclaimed when memory is short and want_pages is nonzero when shrink_dcache_pages() has been called and the @@ -102,7 +101,7 @@ file handles, the number of used file handles and the maximum number of file handles. When the allocated filehandles come close to the maximum, but the number of actually used ones is -far behind, you've encountered a peek in your filehandle usage +far behind, you've encountered a peak in your filehandle usage and you don't need to increase the maximum. ============================================================== @@ -113,7 +112,7 @@ dynamically, but can't free them yet... The value in inode-max denotes the maximum number of inode -handlers. This value should be 3-4 times larger as the value +handlers. This value should be 3-4 times larger than the value in file-max, since stdin, stdout and network sockets also need an inode struct to handle them. When you regularly run out of inodes, you need to increase this value. @@ -127,7 +126,7 @@ Nr_inodes stands for the number of inodes the system has allocated, this can be slightly more than inode-max because -Linux allocates them one pagefull at a time. +Linux allocates them one pageful at a time. Nr_free_inodes represents the number of free inodes (?) and preshrink is nonzero when the nr_inodes > inode-max and the diff -u --recursive --new-file v2.1.98/linux/Documentation/sysctl/vm.txt linux/Documentation/sysctl/vm.txt --- v2.1.98/linux/Documentation/sysctl/vm.txt Thu Apr 23 20:21:27 1998 +++ linux/Documentation/sysctl/vm.txt Tue Apr 28 14:22:05 1998 @@ -62,7 +62,7 @@ to a clean buffer, which can just be forgotten about). Setting this to a high value means that Linux can delay disk writes for a long time, but it also means that it will have -to do a lot I/O at once when memory becomes short. A low +to do a lot of I/O at once when memory becomes short. A low value will spread out disk I/O more evenly. The second parameter (ndirty) gives the maximum number of @@ -94,7 +94,8 @@ The three values in this file correspond to the values in the struct buffer_mem. It controls how much memory should -be used for buffer memory. +be used for buffer memory. The percentage is calculated +as a percentage of total system memory. The values are: min_percent -- this is the minumum percentage of memory @@ -111,29 +112,9 @@ This file contains the values in the struct freepages. That struct contains three members: min, low and high. -These numbers are used by the VM subsystem to keep a reasonable -number of pages on the free page list, so that programs can -allocate new pages without having to wait for the system to -free used pages first. The actual freeing of pages is done -by kswapd, a kernel daemon. - -min -- when the number of free pages reaches this - level, only the kernel can allocate memory - for _critical_ tasks only -low -- when the number of free pages drops below - this level, kswapd is woken up immediately -high -- this is kswapd's target, when more than - pages are free, kswapd will stop swapping. - -When the number of free pages is between low and high, -and kswapd hasn't run for swapout_interval jiffies, then -kswapd is woken up too. See swapout_interval for more info. - -When free memory is always low on your system, and kswapd has -trouble keeping up with allocations, you might want to -increase these values, especially high and perhaps low. -I've found that a 1:2:4 relation for these values tend to work -rather well in a heavily loaded system. +These variables are currently unused (?), but they're +very likely to be abused for something else in the near +future, so don't yet remove it from the source... ============================================================== @@ -209,23 +190,23 @@ } swap_control_v5; -------------------------------------------------------------- -The first four variables are used to keep track of Linux' +The first four variables are used to keep track of Linux's page aging. Page aging is a bookkeeping method to keep track of which pages of memory are used often, and which pages can be swapped out without consequences. When a page is swapped in, it starts at sc_page_initial_age -(default 3) and when the page is scanned by kswapd, it's age +(default 3) and when the page is scanned by kswapd, its age is adjusted according to the following scheme: -- if the page was used since the last time we scanned, it's - age is increased sc_page_advance (default 3) up to a maximum +- if the page was used since the last time we scanned, its + age is increased by sc_page_advance (default 3) up to a maximum of sc_max_page_age (default 20) -- else (it wasn't used) it's age is decreased sc_page_decline +- else (it wasn't used) its age is decreased by sc_page_decline (default 1) And when a page reaches age 0, it's ready to be swapped out. The next four variables can be used to control kswapd's -agressiveness in swapping out pages. +aggressiveness in swapping out pages. sc_age_cluster_fract is used to calculate how many pages from a process are to be scanned by kswapd. The formula used is @@ -236,10 +217,10 @@ also scan small processes. The values of sc_pageout_weight and sc_bufferout_weight are -used to control the how many tries kswapd will do in order +used to control how many tries kswapd will make in order to swapout one page / buffer. These values can be used to finetune the ratio between user pages and buffer/cache memory. -When you find that your Linux system is swapping out too much +When you find that your Linux system is swapping out too many process pages in order to satisfy buffer memory demands, you might want to either increase sc_bufferout_weight, or decrease the value of sc_pageout_weight. diff -u --recursive --new-file v2.1.98/linux/Documentation/transname.txt linux/Documentation/transname.txt --- v2.1.98/linux/Documentation/transname.txt Tue Mar 10 10:03:30 1998 +++ linux/Documentation/transname.txt Tue Apr 28 14:22:05 1998 @@ -14,8 +14,8 @@ This duplication causes very large efforts in practise, since at least the /etc directory has to be duplicated for every client. Even in /etc many files are identical, for example sendmail.cf, initrc scripts and -others. Maintaining a large pool means to ensure coherence amoung the -duplicates. Classical methods like symlinks are unconvenient +others. Maintaining a large pool requires means to ensure coherence among +the duplicates. Classical methods like symlinks are inconvenient for this task because they have to be valid in the view of mounted filesystems at all clients, not at the server. @@ -28,17 +28,17 @@ file /etc/config (without the #...=...# suffix). On host "myclient", the corresponding other file will appear as /etc/config. So you can access the right file contents under the _same_ name, depending -on which host you are working. +on which host you are working on. -A similar concept can be found in elder HP-UX versions, but with -so-called "hidden directories" which don't allow contemporary viewing +A similar concept can be found in older HP-UX versions, but with +so-called "hidden directories" which don't allow contemporary viewing of all versions by default. In contrast, transname shows all context-dependent files in the dir listing and they can be edited using the fully qualified name. -Transname was developped for and is used at our Linux pool at the -University of Stuttgart with good results. Maintainance of the pool is -at a minimum, and adding new clients is a child's play. No worry with +Transname was developed for and is used at our Linux pool at the +University of Stuttgart with good results. Maintenance of the pool is +at a minimum, and adding new clients is child's play. No worry with keeping up mail configurations, newly installed tools, changed /etc/services, /etc/shells, /etc/resolv.conf and many, many others. In contrast to a sophisticated symlink solution, adding a new file to the /etc directory @@ -46,7 +46,7 @@ An example for the use of linux-2.0-transname.patch: -For example, you can make your /etc/fstab context-dependend. If you want +For example, you can make your /etc/fstab context-dependent. If you want to do that, you should create an /etc/fstab#ktype=default# for the server and an /etc/fstab#ktype=diskless# for all clients. This is because your clients may not yet know their own hostname when they attempt to mount @@ -54,7 +54,7 @@ "diskless" into different kernels for servers and clients. Of course, if your clients boot via bootp and know their names when mounting the root, you can use /etc/fstab#host=myclient# instead. But at least servers -booting from disk normally dont know their hostname at root mount time, +booting from disk normally don't know their hostname at root mount time, so you can mix methods and use /etc/fstab#ktype=default# for the server, /etc/fstab#ktype=diskless# for the majority of the clients and /etc/fstab#host=myclient# for some specific client, because translation @@ -73,7 +73,7 @@ Others may be added in future. -The current translation are displayed at boot time in the kernel messages +The current translations are displayed at boot time in the kernel messages for easier debugging, and can be retrieved by reading /proc/sys/kernel/nametrans which is a special file containing the currently valid translations. @@ -105,7 +105,7 @@ echo "" > /proc/sys/kernel/nametrans Another drawback is that administration tools currently are not aware of -context-dependend files, so you cannot switch between contexts inside +context-dependent files, so you cannot switch between contexts inside one tool session. However, you can simulate administration sessions on the server as if they were running on some client. To do this, you have to set an environment variable NAMETRANS which has to be the @@ -131,7 +131,7 @@ process running on "mango" tries to create a file /etc/mtab, the version /etc/mtab#host=mango# is created instead (which appears in turn as hardlinked to /etc/mtab). Note that if you want to make /etc/fstab -context-dependend, you should execute "touch /etc/mtab#host=CREATE#" and +context-dependent, you should execute "touch /etc/mtab#host=CREATE#" and "touch /etc/mtab.tmp#host=CREATE#", because mount, umount and others running on different hosts would otherwise try to create one shared /etc/mtab which would result in a clash. Also one should execute @@ -155,11 +155,11 @@ the help texts that are associated with the transname options, they tell you further hints not mentioned in this README. Then build your kernel as usual, install it with a *new* kernel-filename, add a *new* entry to -/etc/lilo.conf and run lilo. **DONT CHANGE** any configuration files for the +/etc/lilo.conf and run lilo. **DON'T CHANGE** any configuration files for the first reboot! Just reboot the new kernel and play a little bit around with -creating context-dependend filenames in your home directory. +creating context-dependent filenames in your home directory. Try all modes including setting NAMETRANS to different values. As an example for the changes necessary on our LST-1.8-based Linux pool, @@ -247,7 +247,7 @@ #!/bin/sh exec /usr/bin/env - NAMETRANS= "`/usr/bin/env`" $0.notrans $* -Of course, that could be improved, but is a quick hack to get things work. +Of course, that could be improved, but is a quick hack to get things to work. Enjoy, diff -u --recursive --new-file v2.1.98/linux/Documentation/unicode.txt linux/Documentation/unicode.txt --- v2.1.98/linux/Documentation/unicode.txt Tue Nov 12 00:30:57 1996 +++ linux/Documentation/unicode.txt Tue Apr 28 14:22:05 1998 @@ -62,14 +62,14 @@ worth supporting. Hence I have chosen to add it to the list in the Linux Zone. -Several glyph forms for the Klingon alphabet has been proposed. +Several glyph forms for the Klingon alphabet have been proposed. However, since the set of symbols appear to be consistent throughout, with only the actual shapes being different, in keeping with standard Unicode practice these differences are considered font variants. Klingon has an alphabet of 26 characters, a positional numeric writing system with 10 digits, and is written left-to-right, top-to-bottom. -Punctuation appears to be only used in Latin transliteration; it is +Punctuation appears to be only used in Latin transliteration; it appears customary to write each sentence on its own line, and centered. Space has been reserved for punctuation should it prove necessary. diff -u --recursive --new-file v2.1.98/linux/Makefile linux/Makefile --- v2.1.98/linux/Makefile Sat Apr 25 18:13:10 1998 +++ linux/Makefile Mon Apr 27 16:46:58 1998 @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 1 -SUBLEVEL = 98 +SUBLEVEL = 99 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/) diff -u --recursive --new-file v2.1.98/linux/arch/alpha/config.in linux/arch/alpha/config.in --- v2.1.98/linux/arch/alpha/config.in Wed Apr 8 19:36:24 1998 +++ linux/arch/alpha/config.in Tue Apr 28 22:41:33 1998 @@ -176,7 +176,8 @@ # if [ "$CONFIG_TGA_CONSOLE" = "y" ]; then # bool 'VGA Console Support' CONFIG_VGA_CONSOLE # fi - if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool 'PCI quirks' CONFIG_PCI_QUIRKS + if [ "$CONFIG_PCI_QUIRKS" = "y" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then bool 'PCI bridge optimization (experimental)' CONFIG_PCI_OPTIMIZE fi bool 'Backward-compatible /proc/pci' CONFIG_PCI_OLD_PROC diff -u --recursive --new-file v2.1.98/linux/arch/alpha/lib/checksum.c linux/arch/alpha/lib/checksum.c --- v2.1.98/linux/arch/alpha/lib/checksum.c Sat Nov 23 02:29:03 1996 +++ linux/arch/alpha/lib/checksum.c Tue Apr 28 22:28:10 1998 @@ -37,6 +37,27 @@ ((unsigned long) proto << 8)); } +unsigned int csum_tcpudp_nofold(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + unsigned long result; + + result = (saddr + daddr + sum + + ((unsigned long) ntohs(len) << 16) + + ((unsigned long) proto << 8)); + + /* Fold down to 32-bits so we don't loose in the typedef-less + network stack. */ + /* 64 to 33 */ + result = (result & 0xffffffff) + (result >> 32); + /* 33 to 32 */ + result = (result & 0xffffffff) + (result >> 32); + return result; +} + /* * Do a 64-bit checksum on an arbitrary memory area.. * diff -u --recursive --new-file v2.1.98/linux/arch/alpha/lib/copy_user.S linux/arch/alpha/lib/copy_user.S --- v2.1.98/linux/arch/alpha/lib/copy_user.S Thu Feb 6 04:42:35 1997 +++ linux/arch/alpha/lib/copy_user.S Sat Apr 25 22:35:18 1998 @@ -27,11 +27,18 @@ */ /* Allow an exception for an insn; exit if we get one. */ -#define EX(x,y...) \ +#define EXI(x,y...) \ 99: x,##y; \ .section __ex_table,"a"; \ .gprel32 99b; \ - lda $31, $exit-99b($31); \ + lda $31, $exitin-99b($31); \ + .previous + +#define EXO(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .gprel32 99b; \ + lda $31, $exitout-99b($31); \ .previous .set noat @@ -45,14 +52,14 @@ subq $3,8,$3 .align 5 $37: - EX( ldq_u $1,0($7) ) - EX( ldq_u $2,0($6) ) + EXI( ldq_u $1,0($7) ) + EXO( ldq_u $2,0($6) ) extbl $1,$7,$1 mskbl $2,$6,$2 insbl $1,$6,$1 addq $3,1,$3 bis $1,$2,$1 - EX( stq_u $1,0($6) ) + EXO( stq_u $1,0($6) ) subq $0,1,$0 addq $6,1,$6 addq $7,1,$7 @@ -63,10 +70,10 @@ bic $0,7,$4 beq $1,$43 beq $4,$48 - EX( ldq_u $3,0($7) ) + EXI( ldq_u $3,0($7) ) .align 5 $50: - EX( ldq_u $2,8($7) ) + EXI( ldq_u $2,8($7) ) subq $4,8,$4 extql $3,$7,$3 extqh $2,$7,$1 @@ -81,13 +88,13 @@ beq $0,$41 .align 5 $57: - EX( ldq_u $1,0($7) ) - EX( ldq_u $2,0($6) ) + EXI( ldq_u $1,0($7) ) + EXO( ldq_u $2,0($6) ) extbl $1,$7,$1 mskbl $2,$6,$2 insbl $1,$6,$1 bis $1,$2,$1 - EX( stq_u $1,0($6) ) + EXO( stq_u $1,0($6) ) subq $0,1,$0 addq $6,1,$6 addq $7,1,$7 @@ -98,7 +105,7 @@ beq $4,$65 .align 5 $66: - EX( ldq $1,0($7) ) + EXI( ldq $1,0($7) ) subq $4,8,$4 stq $1,0($6) addq $7,8,$7 @@ -107,15 +114,31 @@ bne $4,$66 $65: beq $0,$41 - EX( ldq $2,0($7) ) - EX( ldq $1,0($6) ) + EXI( ldq $2,0($7) ) + EXO( ldq $1,0($6) ) mskql $2,$0,$2 mskqh $1,$0,$1 bis $2,$1,$2 - EX( stq $2,0($6) ) + EXO( stq $2,0($6) ) bis $31,$31,$0 $41: $35: -$exit: +$exitout: ret $31,($28),1 + +$exitin: + /* A stupid byte-by-byte zeroing of the rest of the output + buffer. This cures security holes by never leaving + random kernel data around to be copied elsewhere. */ + + mov $0,$1 +$101: + EXO ( ldq_u $2,0($6) ) + subq $1,1,$1 + mskbl $2,$6,$2 + EXO ( stq_u $2,0($6) ) + addq $6,1,$6 + bgt $1,$101 + ret $31,($28),1 + .end __copy_user diff -u --recursive --new-file v2.1.98/linux/arch/alpha/lib/csum_partial_copy.c linux/arch/alpha/lib/csum_partial_copy.c --- v2.1.98/linux/arch/alpha/lib/csum_partial_copy.c Fri Jan 23 18:10:31 1998 +++ linux/arch/alpha/lib/csum_partial_copy.c Tue Apr 28 22:28:10 1998 @@ -365,6 +365,12 @@ } unsigned int +csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum) +{ + return do_csum_partial_copy_from_user(src, dst, len, sum, NULL); +} + +unsigned int csum_partial_copy (const char *src, char *dst, int len, unsigned int sum) { unsigned int ret; diff -u --recursive --new-file v2.1.98/linux/arch/alpha/math-emu/ieee-math.c linux/arch/alpha/math-emu/ieee-math.c --- v2.1.98/linux/arch/alpha/math-emu/ieee-math.c Tue Mar 10 10:03:30 1998 +++ linux/arch/alpha/math-emu/ieee-math.c Tue Apr 28 13:00:26 1998 @@ -733,19 +733,23 @@ * FPCR_INV if invalid operation occurred, etc. */ unsigned long -ieee_CVTTQ (int f, unsigned long a, unsigned long *b) +ieee_CVTTQ (int f, unsigned long a, unsigned long *pb) { unsigned int midway; - unsigned long ov, uv, res = 0; + unsigned long ov, uv, res, b; fpclass_t a_type; EXTENDED temp; - *b = 0; a_type = extend_ieee(a, &temp, DOUBLE); + + b = 0x7fffffffffffffff; + res = FPCR_INV; if (a_type == NaN || a_type == INFTY) - return FPCR_INV; + goto out; + + res = 0; if (a_type == QNaN) - return 0; + goto out; if (temp.e > 0) { ov = 0; @@ -757,7 +761,7 @@ if (ov || (temp.f[1] & 0xffc0000000000000)) res |= FPCR_IOV | FPCR_INE; } - if (temp.e < 0) { + else if (temp.e < 0) { while (temp.e < 0) { ++temp.e; uv = temp.f[0] & 1; /* save sticky bit */ @@ -765,7 +769,8 @@ temp.f[0] |= uv; } } - *b = ((temp.f[1] << 9) | (temp.f[0] >> 55)) & 0x7fffffffffffffff; + b = (temp.f[1] << 9) | (temp.f[0] >> 55); + /* * Notice: the fraction is only 52 bits long. Thus, rounding * cannot possibly result in an integer overflow. @@ -776,18 +781,18 @@ midway = (temp.f[0] & 0x003fffffffffffff) == 0; if ((midway && (temp.f[0] & 0x0080000000000000)) || !midway) - ++*b; + ++b; } break; case ROUND_PINF: if ((temp.f[0] & 0x007fffffffffffff) != 0) - ++*b; + ++b; break; case ROUND_NINF: if ((temp.f[0] & 0x007fffffffffffff) != 0) - --*b; + --b; break; case ROUND_CHOP: @@ -798,8 +803,11 @@ res |= FPCR_INE; if (temp.s) { - *b = -*b; + b = -b; } + +out: + *pb = b; return res; } diff -u --recursive --new-file v2.1.98/linux/arch/i386/config.in linux/arch/i386/config.in --- v2.1.98/linux/arch/i386/config.in Wed Apr 8 19:36:24 1998 +++ linux/arch/i386/config.in Tue Apr 28 22:41:33 1998 @@ -17,6 +17,9 @@ Pentium/K5/5x86/6x86 CONFIG_M586 \ PPro/K6/6x86MX CONFIG_M686" Pentium bool 'Math emulation' CONFIG_MATH_EMULATION +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR +fi endmenu mainmenu_option next_comment @@ -36,7 +39,8 @@ if [ "$CONFIG_PCI" = "y" ]; then bool ' PCI BIOS support' CONFIG_PCI_BIOS bool ' PCI direct access support' CONFIG_PCI_DIRECT - if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' PCI quirks' CONFIG_PCI_QUIRKS + if [ "$CONFIG_PCI_QUIRKS" = "y" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then bool ' PCI bridge optimization (experimental)' CONFIG_PCI_OPTIMIZE fi bool ' Backward-compatible /proc/pci' CONFIG_PCI_OLD_PROC diff -u --recursive --new-file v2.1.98/linux/arch/i386/defconfig linux/arch/i386/defconfig --- v2.1.98/linux/arch/i386/defconfig Wed Apr 1 20:11:47 1998 +++ linux/arch/i386/defconfig Tue Apr 28 23:20:03 1998 @@ -15,7 +15,6 @@ CONFIG_M586=y # CONFIG_M686 is not set # CONFIG_MATH_EMULATION is not set -CONFIG_MAX_MEMSIZE=1024 # # Loadable module support @@ -31,6 +30,7 @@ CONFIG_PCI=y CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y +CONFIG_PCI_QUIRKS=y CONFIG_PCI_OLD_PROC=y # CONFIG_MCA is not set CONFIG_SYSVIPC=y diff -u --recursive --new-file v2.1.98/linux/arch/i386/kernel/Makefile linux/arch/i386/kernel/Makefile --- v2.1.98/linux/arch/i386/kernel/Makefile Tue Jan 20 12:52:09 1998 +++ linux/arch/i386/kernel/Makefile Tue Apr 28 22:41:33 1998 @@ -21,6 +21,7 @@ O_OBJS := process.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o OX_OBJS := i386_ksyms.o +MX_OBJS := ifdef CONFIG_PCI O_OBJS += bios32.o @@ -28,6 +29,14 @@ ifdef CONFIG_MCA O_OBJS += mca.o +endif + +ifeq ($(CONFIG_MTRR),y) +OX_OBJS += mtrr.o +else + ifeq ($(CONFIG_MTRR),m) + MX_OBJS += mtrr.o + endif endif diff -u --recursive --new-file v2.1.98/linux/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- v2.1.98/linux/arch/i386/kernel/entry.S Wed Apr 1 20:11:47 1998 +++ linux/arch/i386/kernel/entry.S Thu Apr 30 15:17:16 1998 @@ -155,7 +155,7 @@ jae badsys testb $0x20,flags(%ebx) # PF_TRACESYS jne tracesys - call SYMBOL_NAME(sys_call_table)(,%eax,4) + call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value ALIGN .globl ret_from_sys_call @@ -193,7 +193,7 @@ movl $-ENOSYS,EAX(%esp) call SYMBOL_NAME(syscall_trace) movl ORIG_EAX(%esp),%eax - call SYMBOL_NAME(sys_call_table)(,%eax,4) + call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value call SYMBOL_NAME(syscall_trace) jmp ret_from_sys_call diff -u --recursive --new-file v2.1.98/linux/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c --- v2.1.98/linux/arch/i386/kernel/i386_ksyms.c Mon Apr 6 17:40:59 1998 +++ linux/arch/i386/kernel/i386_ksyms.c Tue Apr 28 22:41:33 1998 @@ -82,6 +82,8 @@ EXPORT_SYMBOL(__global_sti); EXPORT_SYMBOL(__global_save_flags); EXPORT_SYMBOL(__global_restore_flags); +EXPORT_SYMBOL(smp_message_pass); +EXPORT_SYMBOL(mtrr_hook); #endif #ifdef CONFIG_MCA diff -u --recursive --new-file v2.1.98/linux/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c --- v2.1.98/linux/arch/i386/kernel/irq.c Sat Apr 25 18:13:10 1998 +++ linux/arch/i386/kernel/irq.c Thu Apr 30 13:08:47 1998 @@ -68,12 +68,6 @@ spinlock_t irq_controller_lock; -static unsigned int irq_events [NR_IRQS] = { -1, }; -static int disabled_irq [NR_IRQS] = { 0, }; -#ifdef __SMP__ -static int ipi_pending [NR_IRQS] = { 0, }; -#endif - /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) * boards the timer interrupt and sometimes the keyboard interrupt is @@ -126,11 +120,34 @@ }; #endif -struct hw_interrupt_type *irq_handles[NR_IRQS] = -{ - [0 ... 15] = &i8259A_irq_type /* standard ISA IRQs */ +/* + * Status: reason for being disabled: somebody has + * done a "disable_irq()" or we must not re-enter the + * already executing irq.. + */ +#define IRQ_INPROGRESS 1 +#define IRQ_DISABLED 2 + +/* + * This is the "IRQ descriptor", which contains various information + * about the irq, including what kind of hardware handling it has, + * whether it is disabled etc etc. + * + * Pad this out to 32 bytes for cache and indexing reasons. + */ +typedef struct { + unsigned int status; /* IRQ status - IRQ_INPROGRESS, IRQ_DISABLED */ + unsigned int events; /* Do we have any pending events? */ + unsigned int ipi; /* Have we sent off the pending IPI? */ + struct hw_interrupt_type *handler; /* handle/enable/disable functions */ + struct irqaction *action; /* IRQ action list */ + unsigned int unused[3]; +} irq_desc_t; + +irq_desc_t irq_desc[NR_IRQS] = { + [0 ... 15] = { 0, 0, 0, &i8259A_irq_type, }, /* standard ISA IRQs */ #ifdef __SMP__ - , [16 ... NR_IRQS-1] = &ioapic_irq_type /* 'high' PCI IRQs */ + [16 ... 23] = { 0, 0, 0, &ioapic_irq_type, }, /* 'high' PCI IRQs */ #endif }; @@ -177,6 +194,7 @@ void unmask_generic_irq(unsigned int irq) { + irq_desc[irq].status = 0; if (IO_APIC_IRQ(irq)) enable_IO_APIC_irq(irq); else { @@ -243,6 +261,7 @@ BUILD_SMP_INTERRUPT(reschedule_interrupt) BUILD_SMP_INTERRUPT(invalidate_interrupt) BUILD_SMP_INTERRUPT(stop_cpu_interrupt) +BUILD_SMP_INTERRUPT(mtrr_interrupt) /* * every pentium local APIC has two 'local interrupts', with a @@ -299,17 +318,6 @@ */ static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; -static struct irqaction *irq_action[NR_IRQS] = { - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL -#ifdef __SMP__ - ,NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL -#endif -}; - int get_irq_list(char *buf) { int i, j; @@ -322,7 +330,7 @@ *p++ = '\n'; for (i = 0 ; i < NR_IRQS ; i++) { - action = irq_action[i]; + action = irq_desc[i].action; if (!action) continue; p += sprintf(p, "%3d: ",i); @@ -630,7 +638,7 @@ int status; status = 0; - action = *(irq + irq_action); + action = irq_desc[irq].action; if (action) { status |= 1; @@ -651,18 +659,6 @@ return status; } - -void disable_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - irq_handles[irq]->disable(irq); - spin_unlock_irqrestore(&irq_controller_lock, flags); - - synchronize_irq(); -} - /* * disable/enable_irq() wait for all irq contexts to finish * executing. Also it's recursive. @@ -673,61 +669,16 @@ set_8259A_irq_mask(irq); } -#ifdef __SMP__ -static void disable_ioapic_irq(unsigned int irq) -{ - disabled_irq[irq] = 1; - /* - * We do not disable IO-APIC irqs in hardware ... - */ -} -#endif - void enable_8259A_irq (unsigned int irq) { - unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); cached_irq_mask &= ~(1 << irq); set_8259A_irq_mask(irq); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} - -#ifdef __SMP__ -void enable_ioapic_irq (unsigned int irq) -{ - unsigned long flags, should_handle_irq; - int cpu = smp_processor_id(); - - spin_lock_irqsave(&irq_controller_lock, flags); - disabled_irq[irq] = 0; - - /* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we dont have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ - if (irq_events[irq]) { - if (!ipi_pending[irq]) { - ipi_pending[irq] = 1; - --irq_events[irq]; - send_IPI(cpu,IO_APIC_VECTOR(irq)); - } - } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -#endif - -void enable_irq(unsigned int irq) -{ - irq_handles[irq]->enable(irq); } void make_8259A_irq (unsigned int irq) { io_apic_irqs &= ~(1<events && !desc->ipi) { + desc->ipi = 1; + send_IPI(APIC_DEST_SELF, IO_APIC_VECTOR(irq)); + } +} + +/* + * We do not actually disable IO-APIC irqs in hardware ... + */ +static void disable_ioapic_irq(unsigned int irq) +{ +} + static void do_ioapic_IRQ(unsigned int irq, int cpu, struct pt_regs * regs) { - int should_handle_irq = 0; + irq_desc_t *desc = irq_desc + irq; + + spin_lock(&irq_controller_lock); + /* Ack the irq inside the lock! */ ack_APIC_irq(); + desc->ipi = 0; - spin_lock(&irq_controller_lock); - if (ipi_pending[irq]) - ipi_pending[irq] = 0; + /* If the irq is disabled for whatever reason, just set a flag and return */ + if (desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)) { + desc->events = 1; + spin_unlock(&irq_controller_lock); + return; + } - if (!irq_events[irq]++ && !disabled_irq[irq]) - should_handle_irq = 1; + desc->status = IRQ_INPROGRESS; + desc->events = 0; hardirq_enter(cpu); spin_unlock(&irq_controller_lock); - if (should_handle_irq) { - while (test_bit(0,&global_irq_lock)) mb(); -again: - handle_IRQ_event(irq, regs); + while (test_bit(0,&global_irq_lock)) barrier(); + + for (;;) { + int pending; + + /* If there is no IRQ handler, exit early, leaving the irq "in progress" */ + if (!handle_IRQ_event(irq, regs)) + goto no_handler; spin_lock(&irq_controller_lock); - should_handle_irq=0; - if (--irq_events[irq] && !disabled_irq[irq]) - should_handle_irq=1; + pending = desc->events; + desc->events = 0; + if (!pending) + break; spin_unlock(&irq_controller_lock); - - if (should_handle_irq) - goto again; } + desc->status &= IRQ_DISABLED; + spin_unlock(&irq_controller_lock); +no_handler: hardirq_exit(cpu); release_irqlock(cpu); } + #endif + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ +void disable_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_controller_lock, flags); + /* + * At this point we may actually have a pending interrupt being active + * on another CPU. So don't touch the IRQ_INPROGRESS bit.. + */ + irq_desc[irq].status |= IRQ_DISABLED; + irq_desc[irq].handler->disable(irq); + spin_unlock_irqrestore(&irq_controller_lock, flags); + + synchronize_irq(); +} + +void enable_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_controller_lock, flags); + /* + * In contrast to the above, we should _not_ have any concurrent + * interrupt activity here, so we just clear both disabled bits. + * + * This allows us to have IRQ_INPROGRESS set until we actually + * install a handler for this interrupt (make irq autodetection + * work by just looking at the status field for the irq) + */ + irq_desc[irq].status = 0; + irq_desc[irq].handler->enable(irq); + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -836,7 +867,7 @@ int cpu = smp_processor_id(); kstat.irqs[cpu][irq]++; - irq_handles[irq]->handle(irq, cpu, ®s); + irq_desc[irq].handler->handle(irq, cpu, ®s); /* * This should be conditional: we should really get @@ -856,7 +887,7 @@ struct irqaction *old, **p; unsigned long flags; - p = irq_action + irq; + p = &irq_desc[irq].action; if ((old = *p) != NULL) { /* Can't share interrupts unless both agree to */ if (!(old->flags & new->flags & SA_SHIRQ)) @@ -881,7 +912,7 @@ spin_lock(&irq_controller_lock); #ifdef __SMP__ if (IO_APIC_IRQ(irq)) { - irq_handles[irq] = &ioapic_irq_type; + irq_desc[irq].handler = &ioapic_irq_type; /* * First disable it in the 8259A: */ @@ -939,7 +970,7 @@ printk("Trying to free IRQ%d\n",irq); return; } - for (p = irq + irq_action; (action = *p) != NULL; p = &action->next) { + for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { if (action->dev_id != dev_id) continue; @@ -955,32 +986,29 @@ } /* - * probing is always single threaded [FIXME: is this true?] + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_INPROGRESS" asserted and the interrupt + * disabled. */ -static unsigned int probe_irqs[NR_CPUS][NR_IRQS]; - unsigned long probe_irq_on (void) { - unsigned int i, j, irqs = 0; + unsigned int i, irqs = 0; unsigned long delay; /* - * save current irq counts - */ - memcpy(probe_irqs,kstat.irqs,NR_CPUS*NR_IRQS*sizeof(int)); - - /* * first, enable any unassigned irqs */ + spin_lock_irq(&irq_controller_lock); for (i = NR_IRQS-1; i > 0; i--) { - if (!irq_action[i]) { - unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); + if (!irq_desc[i].action) { unmask_generic_irq(i); irqs |= (1 << i); - spin_unlock_irqrestore(&irq_controller_lock, flags); } } + spin_unlock_irq(&irq_controller_lock); /* * wait for spurious interrupts to increase counters @@ -991,35 +1019,35 @@ /* * now filter out any obviously spurious interrupts */ - for (i=0; i>= 1; } if (irq_found == -1) irq_found = 0; out: + spin_unlock_irq(&irq_controller_lock); return irq_found; } @@ -1041,7 +1069,7 @@ for (i = 0; i < NR_IRQS ; i++) if (IO_APIC_VECTOR(i) <= 0xfe) /* HACK */ { if (IO_APIC_IRQ(i)) { - irq_handles[i] = &ioapic_irq_type; + irq_desc[i].handler = &ioapic_irq_type; /* * First disable it in the 8259A: */ @@ -1063,8 +1091,8 @@ outb(LATCH >> 8 , 0x40); /* MSB */ for (i=0; i + Initial register-setting code (from proform-1.0). + 19971216 Richard Gooch + Original version for /proc/mtrr interface, SMP-safe. + v1.0 + 19971217 Richard Gooch + Bug fix for ioctls()'s. + Added sample code in Documentation/mtrr.txt + v1.1 + 19971218 Richard Gooch + Disallow overlapping regions. + 19971219 Jens Maurer + Register-setting fixups. + v1.2 + 19971222 Richard Gooch + Fixups for kernel 2.1.75. + v1.3 + 19971229 David Wragg + Register-setting fixups and conformity with Intel conventions. + 19971229 Richard Gooch + Cosmetic changes and wrote this ChangeLog ;-) + 19980106 Richard Gooch + Fixups for kernel 2.1.78. + v1.4 + 19980119 David Wragg + Included passive-release enable code (elsewhere in PCI setup). + v1.5 + 19980131 Richard Gooch + Replaced global kernel lock with private spinlock. + v1.6 + 19980201 Richard Gooch + Added wait for other CPUs to complete changes. + v1.7 + 19980202 Richard Gooch + Bug fix in definition of for UP. + v1.8 + 19980319 Richard Gooch + Fixups for kernel 2.1.90. + 19980323 Richard Gooch + Move SMP BIOS fixup before secondary CPUs call + v1.9 + 19980325 Richard Gooch + Fixed test for overlapping regions: confused by adjacent regions + 19980326 Richard Gooch + Added wbinvd in . + 19980401 Richard Gooch + Bug fix for non-SMP compilation. + 19980418 David Wragg + Fixed-MTRR synchronisation for SMP and use atomic operations + instead of spinlocks. + 19980418 Richard Gooch + Differentiate different MTRR register classes for BIOS fixup. + v1.10 + 19980419 David Wragg + Bug fix in variable MTRR synchronisation. + v1.11 + 19980419 Richard Gooch + Fixups for kernel 2.1.97. + v1.12 + 19980421 Richard Gooch + Safer synchronisation across CPUs when changing MTRRs. + v1.13 + 19980423 Richard Gooch + Bugfix for SMP systems without MTRR support. + v1.14 + 19980427 Richard Gooch + Trap calls to and on non-MTRR machines. + v1.15 + 19980427 Richard Gooch + Use atomic bitops for setting SMP change mask. + v1.16 + 19980428 Richard Gooch + Removed spurious diagnostic message. + v1.17 + 19980429 Richard Gooch + Moved register-setting macros into this file. + Moved setup code from init/main.c to i386-specific areas. + v1.18 +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define MTRR_NEED_STRINGS +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MTRR_VERSION "1.18 (19980429)" + +#define TRUE 1 +#define FALSE 0 + +#define X86_FEATURE_MTRR 0x1000 /* memory type registers */ + +#define MTRRcap_MSR 0x0fe +#define MTRRdefType_MSR 0x2ff + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + +#define NUM_FIXED_RANGES 88 +#define MTRRfix64K_00000_MSR 0x250 +#define MTRRfix16K_80000_MSR 0x258 +#define MTRRfix16K_A0000_MSR 0x259 +#define MTRRfix4K_C0000_MSR 0x268 +#define MTRRfix4K_C8000_MSR 0x269 +#define MTRRfix4K_D0000_MSR 0x26a +#define MTRRfix4K_D8000_MSR 0x26b +#define MTRRfix4K_E0000_MSR 0x26c +#define MTRRfix4K_E8000_MSR 0x26d +#define MTRRfix4K_F0000_MSR 0x26e +#define MTRRfix4K_F8000_MSR 0x26f + +#ifdef __SMP__ +# define MTRR_CHANGE_MASK_FIXED 0x01 +# define MTRR_CHANGE_MASK_VARIABLE 0x02 +# define MTRR_CHANGE_MASK_DEFTYPE 0x04 +#endif + +/* In the processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +#define LINE_SIZE 80 +#define JIFFIE_TIMEOUT 100 + +#ifdef __SMP__ +# define set_mtrr(reg,base,size,type) set_mtrr_smp (reg, base, size, type) +#else +# define set_mtrr(reg,base,size,type) set_mtrr_up (reg, base, size, type,TRUE) +#endif + +#ifndef CONFIG_PROC_FS +# define compute_ascii() while (0) +#endif + +#ifdef CONFIG_PROC_FS +static char *ascii_buffer = NULL; +static unsigned int ascii_buf_bytes = 0; +#endif +static unsigned int *usage_table = NULL; +#ifdef __SMP__ +static spinlock_t main_lock = SPIN_LOCK_UNLOCKED; +#endif + +/* Private functions */ +#ifdef CONFIG_PROC_FS +static void compute_ascii (void); +#endif + + +struct set_mtrr_context +{ + unsigned long flags; + unsigned long deftype_lo; + unsigned long deftype_hi; + unsigned long cr4val; +}; + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ +#define rdmsr(msr,val1,val2) \ + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + + +/* Put the processor into a state where MTRRs can be safely set. */ +static void set_mtrr_prepare(struct set_mtrr_context *ctxt) +{ + unsigned long tmp; + + /* disable interrupts */ + save_flags(ctxt->flags); cli(); + + /* save value of CR4 and clear Page Global Enable (bit 7) */ + asm volatile ("movl %%cr4, %0\n\t" + "movl %0, %1\n\t" + "andb $0x7f, %b1\n\t" + "movl %1, %%cr4\n\t" + : "=r" (ctxt->cr4val), "=q" (tmp) : : "memory"); + + /* disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect. */ + asm volatile ("movl %%cr0, %0\n\t" + "orl $0x40000000, %0\n\t" + "wbinvd\n\t" + "movl %0, %%cr0\n\t" + "wbinvd\n\t" + : "=r" (tmp) : : "memory"); + + /* disable MTRRs, and set the default type to uncached. */ + rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); +} /* End Function set_mtrr_prepare */ + + +/* Restore the processor after a set_mtrr_prepare */ +static void set_mtrr_done(struct set_mtrr_context *ctxt) +{ + unsigned long tmp; + + /* flush caches and TLBs */ + asm volatile ("wbinvd" : : : "memory" ); + + /* restore MTRRdefType */ + wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + + /* enable caches */ + asm volatile ("movl %%cr0, %0\n\t" + "andl $0xbfffffff, %0\n\t" + "movl %0, %%cr0\n\t" + : "=r" (tmp) : : "memory"); + + /* restore value of CR4 */ + asm volatile ("movl %0, %%cr4" + : : "r" (ctxt->cr4val) : "memory"); + + /* re-enable interrupts (if enabled previously) */ + restore_flags(ctxt->flags); +} /* End Function set_mtrr_done */ + + +/* this function returns the number of variable MTRRs */ +static unsigned int get_num_var_ranges (void) +{ + unsigned long config, dummy; + + rdmsr(MTRRcap_MSR, config, dummy); + return (config & 0xff); +} /* End Function get_num_var_ranges */ + + +/* non-zero if we have the write-combining memory type. */ +static int have_wrcomb (void) +{ + unsigned long config, dummy; + + rdmsr(MTRRcap_MSR, config, dummy); + return (config & (1<<10)); +} + + +static void get_mtrr (unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type *type) +{ + unsigned long dummy, mask_lo, base_lo; + + rdmsr(MTRRphysMask_MSR(reg), mask_lo, dummy); + if ((mask_lo & 0x800) == 0) { + /* Invalid (i.e. free) range. */ + *base = 0; + *size = 0; + *type = 0; + return; + } + + rdmsr(MTRRphysBase_MSR(reg), base_lo, dummy); + + /* We ignore the extra address bits (32-35). If someone wants to + run x86 Linux on a machine with >4GB memory, this will be the + least of their problems. */ + + /* Clean up mask_lo so it gives the real address mask. */ + mask_lo = (mask_lo & 0xfffff000UL); + + /* This works correctly if size is a power of two, i.e. a + contiguous range. */ + *size = ~(mask_lo - 1); + + *base = (base_lo & 0xfffff000UL); + *type = (base_lo & 0xff); +} /* End Function get_mtrr */ + + +static void set_mtrr_up (unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type, int do_safe) +/* [SUMMARY] Set variable MTRR register on the local CPU. + The register to set. + The base address of the region. + The size of the region. If this is 0 the region is disabled. + The type of the region. + If TRUE, do the change safely. If FALSE, safety measures should + be done externally. +*/ +{ + struct set_mtrr_context ctxt; + + if (do_safe) set_mtrr_prepare (&ctxt); + if (size == 0) + { + /* The invalid bit is kept in the mask, so we simply clear the + relevant mask register to disable a range. */ + wrmsr (MTRRphysMask_MSR (reg), 0, 0); + } + else + { + wrmsr (MTRRphysBase_MSR (reg), base | type, 0); + wrmsr (MTRRphysMask_MSR (reg), ~(size - 1) | 0x800, 0); + } + if (do_safe) set_mtrr_done (&ctxt); +} /* End Function set_mtrr_up */ + + +#ifdef __SMP__ + +struct mtrr_var_range +{ + unsigned long base_lo; + unsigned long base_hi; + unsigned long mask_lo; + unsigned long mask_hi; +}; + + +/* Get the MSR pair relating to a var range. */ +__initfunc(static void get_mtrr_var_range (unsigned int index, + struct mtrr_var_range *vr)) +{ + rdmsr (MTRRphysBase_MSR (index), vr->base_lo, vr->base_hi); + rdmsr (MTRRphysMask_MSR (index), vr->mask_lo, vr->mask_hi); +} /* End Function get_mtrr_var_range */ + + +/* Set the MSR pair relating to a var range. Returns TRUE if + changes are made. */ +__initfunc(static int set_mtrr_var_range_testing (unsigned int index, + struct mtrr_var_range *vr)) +{ + unsigned int lo, hi; + int changed = FALSE; + + rdmsr(MTRRphysBase_MSR(index), lo, hi); + + if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) + || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) { + wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + changed = TRUE; + } + + rdmsr(MTRRphysMask_MSR(index), lo, hi); + + if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) + || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) { + wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); + changed = TRUE; + } + + return changed; +} + + +__initfunc(static void get_fixed_ranges(mtrr_type *frs)) +{ + unsigned long *p = (unsigned long *)frs; + int i; + + rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + + for (i = 0; i < 2; i++) + rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]); + + for (i = 0; i < 8; i++) + rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]); +} + + +__initfunc(static int set_fixed_ranges_testing(mtrr_type *frs)) +{ + unsigned long *p = (unsigned long *)frs; + int changed = FALSE; + int i; + unsigned long lo, hi; + + rdmsr(MTRRfix64K_00000_MSR, lo, hi); + if (p[0] != lo || p[1] != hi) { + wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + changed = TRUE; + } + + for (i = 0; i < 2; i++) { + rdmsr(MTRRfix16K_80000_MSR + i, lo, hi); + if (p[2 + i*2] != lo || p[3 + i*2] != hi) { + wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]); + changed = TRUE; + } + } + + for (i = 0; i < 8; i++) { + rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi); + if (p[6 + i*2] != lo || p[7 + i*2] != hi) { + wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]); + changed = TRUE; + } + } + + return changed; +} + + +struct mtrr_state +{ + unsigned int num_var_ranges; + struct mtrr_var_range *var_ranges; + mtrr_type fixed_ranges[NUM_FIXED_RANGES]; + unsigned char enabled; + mtrr_type def_type; +}; + + +/* Grab all of the mtrr state for this cpu into *state. */ +__initfunc(static void get_mtrr_state(struct mtrr_state *state)) +{ + unsigned int nvrs, i; + struct mtrr_var_range *vrs; + unsigned long lo, dummy; + + nvrs = state->num_var_ranges = get_num_var_ranges(); + vrs = state->var_ranges + = kmalloc(nvrs * sizeof(struct mtrr_var_range), GFP_KERNEL); + if (vrs == NULL) + nvrs = state->num_var_ranges = 0; + + for (i = 0; i < nvrs; i++) + get_mtrr_var_range(i, &vrs[i]); + + get_fixed_ranges(state->fixed_ranges); + + rdmsr(MTRRdefType_MSR, lo, dummy); + state->def_type = (lo & 0xff); + state->enabled = (lo & 0xc00) >> 10; +} /* End Function get_mtrr_state */ + + +/* Free resources associated with a struct mtrr_state */ +__initfunc(static void finalize_mtrr_state(struct mtrr_state *state)) +{ + if (state->var_ranges) kfree (state->var_ranges); +} /* End Function finalize_mtrr_state */ + + +__initfunc(static unsigned long set_mtrr_state (struct mtrr_state *state, + struct set_mtrr_context *ctxt)) +/* [SUMMARY] Set the MTRR state for this CPU. + The MTRR state information to read. + Some relevant CPU context. + [NOTE] The CPU must already be in a safe state for MTRR changes. + [RETURNS] 0 if no changes made, else a mask indication what was changed. +*/ +{ + unsigned int i; + unsigned long change_mask = 0; + + for (i = 0; i < state->num_var_ranges; i++) + if (set_mtrr_var_range_testing(i, &state->var_ranges[i])) + change_mask |= MTRR_CHANGE_MASK_VARIABLE; + + if (set_fixed_ranges_testing(state->fixed_ranges)) + change_mask |= MTRR_CHANGE_MASK_FIXED; + + /* set_mtrr_restore restores the old value of MTRRdefType, + so to set it we fiddle with the saved value. */ + if ((ctxt->deftype_lo & 0xff) != state->def_type + || ((ctxt->deftype_lo & 0xc00) >> 10) != state->enabled) + { + ctxt->deftype_lo |= (state->def_type | state->enabled << 10); + change_mask |= MTRR_CHANGE_MASK_DEFTYPE; + } + + return change_mask; +} /* End Function set_mtrr_state */ + + +static atomic_t undone_count; +static void (*handler_func) (struct set_mtrr_context *ctxt, void *info); +static void *handler_info; +static volatile int wait_barrier_execute = FALSE; +static volatile int wait_barrier_cache_enable = FALSE; + +static void sync_handler (void) +/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. + [RETURNS] Nothing. +*/ +{ + struct set_mtrr_context ctxt; + + set_mtrr_prepare (&ctxt); + /* Notify master CPU that I'm at the barrier and then wait */ + atomic_dec (&undone_count); + while (wait_barrier_execute) barrier (); + /* The master has cleared me to execute */ + (*handler_func) (&ctxt, handler_info); + /* Notify master CPU that I've executed the function */ + atomic_dec (&undone_count); + /* Wait for master to clear me to enable cache and return */ + while (wait_barrier_cache_enable) barrier (); + set_mtrr_done (&ctxt); +} /* End Function sync_handler */ + +static void do_all_cpus (void (*handler) (struct set_mtrr_context *ctxt, + void *info), + void *info, int local) +/* [SUMMARY] Execute a function on all CPUs, with caches flushed and disabled. + [PURPOSE] This function will synchronise all CPUs, flush and disable caches + on all CPUs, then call a specified function. When the specified function + finishes on all CPUs, caches are enabled on all CPUs. + The function to execute. + An arbitrary information pointer which is passed to <>. + If TRUE <> is executed locally. + [RETURNS] Nothing. +*/ +{ + unsigned long timeout; + struct set_mtrr_context ctxt; + + mtrr_hook = sync_handler; + handler_func = handler; + handler_info = info; + wait_barrier_execute = TRUE; + wait_barrier_cache_enable = TRUE; + /* Send a message to all other CPUs and wait for them to enter the + barrier */ + atomic_set (&undone_count, smp_num_cpus - 1); + smp_message_pass (MSG_ALL_BUT_SELF, MSG_MTRR_CHANGE, 0, 0); + /* Wait for it to be done */ + timeout = jiffies + JIFFIE_TIMEOUT; + while ( (atomic_read (&undone_count) > 0) && (jiffies < timeout) ) + barrier (); + if (atomic_read (&undone_count) > 0) + { + panic ("mtrr: timed out waiting for other CPUs\n"); + } + mtrr_hook = NULL; + /* All other CPUs should be waiting for the barrier, with their caches + already flushed and disabled. Prepare for function completion + notification */ + atomic_set (&undone_count, smp_num_cpus - 1); + /* Flush and disable the local CPU's cache and release the barier, which + should cause the other CPUs to execute the function. Also execute it + locally if required */ + set_mtrr_prepare (&ctxt); + wait_barrier_execute = FALSE; + if (local) (*handler) (&ctxt, info); + /* Now wait for other CPUs to complete the function */ + while (atomic_read (&undone_count) > 0) barrier (); + /* Now all CPUs should have finished the function. Release the barrier to + allow them to re-enable their caches and return from their interrupt, + then enable the local cache and return */ + wait_barrier_cache_enable = FALSE; + set_mtrr_done (&ctxt); + handler_func = NULL; + handler_info = NULL; +} /* End Function do_all_cpus */ + + +struct set_mtrr_data +{ + unsigned long smp_base; + unsigned long smp_size; + unsigned int smp_reg; + mtrr_type smp_type; +}; + +static void set_mtrr_handler (struct set_mtrr_context *ctxt, void *info) +{ + struct set_mtrr_data *data = info; + + set_mtrr_up (data->smp_reg, data->smp_base, data->smp_size, data->smp_type, + FALSE); +} /* End Function set_mtrr_handler */ + +static void set_mtrr_smp (unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data; + + data.smp_reg = reg; + data.smp_base = base; + data.smp_size = size; + data.smp_type = type; + do_all_cpus (set_mtrr_handler, &data, TRUE); +} /* End Function set_mtrr_smp */ + + +/* A warning that is common to the module and non-module cases. */ +/* Some BIOS's are fucked and don't set all MTRRs the same! */ +#ifdef MODULE +static void mtrr_state_warn (unsigned long mask) +#else +__initfunc(static void mtrr_state_warn (unsigned long mask)) +#endif +{ + if (!mask) return; + if (mask & MTRR_CHANGE_MASK_FIXED) + printk ("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_VARIABLE) + printk ("mtrr: your CPUs had inconsistent variable MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_DEFTYPE) + printk ("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + printk ("mtrr: probably your BIOS does not setup all CPUs\n"); +} /* End Function mtrr_state_warn */ + +#ifdef MODULE +/* As a module, copy the MTRR state using an IPI handler. */ + +static volatile unsigned long smp_changes_mask = 0; + +static void copy_mtrr_state_handler (struct set_mtrr_context *ctxt, void *info) +{ + unsigned long mask, count; + struct mtrr_state *smp_mtrr_state = info; + + mask = set_mtrr_state (smp_mtrr_state, ctxt); + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) + { + if (mask & 0x01) set_bit (count, &smp_changes_mask); + mask >>= 1; + } +} /* End Function copy_mtrr_state_handler */ + +/* Copies the entire MTRR state of this cpu to all the others. */ +static void copy_mtrr_state (void) +{ + struct mtrr_state ms; + + get_mtrr_state (&ms); + do_all_cpus (copy_mtrr_state_handler, &ms, FALSE); + finalize_mtrr_state (&ms); + mtrr_state_warn (smp_changes_mask); +} /* End Function copy_mtrr_state */ + +#endif /* MODULE */ +#endif /* __SMP__ */ + +static char *attrib_to_str (int x) +{ + return (x <= 6) ? mtrr_strings[x] : "?"; +} /* End Function attrib_to_str */ + +static void init_table (void) +{ + int i, max; + + max = get_num_var_ranges (); + if ( ( usage_table = kmalloc (max * sizeof *usage_table, GFP_KERNEL) ) + == NULL ) + { + printk ("mtrr: could not allocate\n"); + return; + } + for (i = 0; i < max; i++) usage_table[i] = 1; +#ifdef CONFIG_PROC_FS + if ( ( ascii_buffer = kmalloc (max * LINE_SIZE, GFP_KERNEL) ) == NULL ) + { + printk ("mtrr: could not allocate\n"); + return; + } + ascii_buf_bytes = 0; + compute_ascii (); +#endif +} /* End Function init_table */ + +int mtrr_add (unsigned long base, unsigned long size, unsigned int type, + char increment) +/* [SUMMARY] Add an MTRR entry. + The starting (base) address of the region. + The size (in bytes) of the region. + The type of the new region. + If true and the region already exists, the usage count will be + incremented. + [RETURNS] The MTRR register on success, else a negative number indicating + the error code. + [NOTE] This routine uses a spinlock. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize, last; + + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return -ENODEV; + if ( (base & 0xfff) || (size & 0xfff) ) + { + printk ("mtrr: size and base must be multiples of 4kB\n"); + printk ("mtrr: size: %lx base: %lx\n", size, base); + return -EINVAL; + } + if (base + size < 0x100000) + { + printk ("mtrr: cannot set region below 1 MByte (0x%lx,0x%lx)\n", + base, size); + return -EINVAL; + } + /* Check upper bits of base and last are equal and lower bits are 0 for + base and 1 for last */ + last = base + size - 1; + for (lbase = base; !(lbase & 1) && (last & 1); + lbase = lbase >> 1, last = last >> 1); + if (lbase != last) + { + printk ("mtrr: base(0x%lx) is not aligned on a size(0x%lx) boundary\n", + base, size); + return -EINVAL; + } + if (type >= MTRR_NUM_TYPES) + { + printk ("mtrr: type: %u illegal\n", type); + return -EINVAL; + } + /* If the type is WC, check that this processor supports it */ + if ( (type == MTRR_TYPE_WRCOMB) && !have_wrcomb () ) + { + printk ("mtrr: your processor doesn't support write-combining\n"); + return -ENOSYS; + } + increment = increment ? 1 : 0; + max = get_num_var_ranges (); + /* Search for existing MTRR */ + spin_lock (&main_lock); + for (i = 0; i < max; ++i) + { + get_mtrr (i, &lbase, &lsize, <ype); + if (base >= lbase + lsize) continue; + if ( (base < lbase) && (base + size <= lbase) ) continue; + /* At this point we know there is some kind of overlap/enclosure */ + if ( (base < lbase) || (base + size > lbase + lsize) ) + { + spin_unlock (&main_lock); + printk ("mtrr: 0x%lx,0x%lx overlaps existing 0x%lx,0x%lx\n", + base, size, lbase, lsize); + return -EINVAL; + } + if (ltype != type) + { + spin_unlock (&main_lock); + printk ( "mtrr: type missmatch for %lx,%lx old: %s new: %s\n", + base, size, attrib_to_str (ltype), attrib_to_str (type) ); + return -EINVAL; + } + if (increment) ++usage_table[i]; + compute_ascii (); + spin_unlock (&main_lock); + return i; + } + /* Search for an empty MTRR */ + for (i = 0; i < max; ++i) + { + get_mtrr (i, &lbase, &lsize, <ype); + if (lsize > 0) continue; + set_mtrr (i, base, size, type); + usage_table[i] = 1; + compute_ascii (); + spin_unlock (&main_lock); + return i; + } + spin_unlock (&main_lock); + printk ("mtrr: no more MTRRs available\n"); + return -ENOSPC; +} /* End Function mtrr_add */ + +int mtrr_del (int reg, unsigned long base, unsigned long size) +/* [SUMMARY] Delete MTRR/decrement usage count. + The register. If this is less than 0 then <> and <> must + be supplied. + The base address of the region. This is ignored if <> is >= 0. + The size of the region. This is ignored if <> is >= 0. + [RETURNS] The register on success, else a negative number indicating + the error code. + [NOTE] This routine uses a spinlock. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return -ENODEV; + max = get_num_var_ranges (); + spin_lock (&main_lock); + if (reg < 0) + { + /* Search for existing MTRR */ + for (i = 0; i < max; ++i) + { + get_mtrr (i, &lbase, &lsize, <ype); + if ( (lbase == base) && (lsize == size) ) + { + reg = i; + break; + } + } + if (reg < 0) + { + spin_unlock (&main_lock); + printk ("mtrr: no MTRR for %lx,%lx found\n", base, size); + return -EINVAL; + } + } + if (reg >= max) + { + spin_unlock (&main_lock); + printk ("mtrr: register: %d too big\n", reg); + return -EINVAL; + } + get_mtrr (reg, &lbase, &lsize, <ype); + if (lsize < 1) + { + spin_unlock (&main_lock); + printk ("mtrr: MTRR %d not used\n", reg); + return -EINVAL; + } + if (usage_table[reg] < 1) + { + spin_unlock (&main_lock); + printk ("mtrr: reg: %d has count=0\n", reg); + return -EINVAL; + } + if (--usage_table[reg] < 1) set_mtrr (reg, 0, 0, 0); + compute_ascii (); + spin_unlock (&main_lock); + return reg; +} /* End Function mtrr_del */ + +#ifdef CONFIG_PROC_FS + +static int mtrr_file_add (unsigned long base, unsigned long size, + unsigned int type, char increment, struct file *file) +{ + int reg, max; + unsigned int *fcount = file->private_data; + + max = get_num_var_ranges (); + if (fcount == NULL) + { + if ( ( fcount = kmalloc (max * sizeof *fcount, GFP_KERNEL) ) == NULL ) + { + printk ("mtrr: could not allocate\n"); + return -ENOMEM; + } + memset (fcount, 0, max * sizeof *fcount); + file->private_data = fcount; + } + reg = mtrr_add (base, size, type, 1); + if (reg >= 0) ++fcount[reg]; + return reg; +} /* End Function mtrr_file_add */ + +static int mtrr_file_del (unsigned long base, unsigned long size, + struct file *file) +{ + int reg; + unsigned int *fcount = file->private_data; + + reg = mtrr_del (-1, base, size); + if (reg < 0) return reg; + if (fcount != NULL) --fcount[reg]; + return reg; +} /* End Function mtrr_file_del */ + +static ssize_t mtrr_read (struct file *file, char *buf, size_t len, + loff_t *ppos) +{ + if (*ppos >= ascii_buf_bytes) return 0; + if (*ppos + len > ascii_buf_bytes) len = ascii_buf_bytes - *ppos; + if ( copy_to_user (buf, ascii_buffer + *ppos, len) ) return -EFAULT; + *ppos += len; + return len; +} /* End Function mtrr_read */ + +static ssize_t mtrr_write (struct file *file, const char *buf, size_t len, + loff_t *ppos) +/* Format of control line: + "base=%lx size=%lx type=%s" OR: + "disable=%d" +*/ +{ + int i, err; + unsigned long reg, base, size; + char *ptr; + char line[LINE_SIZE]; + + if ( !suser () ) return -EPERM; + /* Can't seek (pwrite) on this device */ + if (ppos != &file->f_pos) return -ESPIPE; + memset (line, 0, LINE_SIZE); + if (len > LINE_SIZE) len = LINE_SIZE; + if ( copy_from_user (line, buf, len - 1) ) return -EFAULT; + ptr = line + strlen (line) - 1; + if (*ptr == '\n') *ptr = '\0'; + if ( !strncmp (line, "disable=", 8) ) + { + reg = simple_strtoul (line + 8, &ptr, 0); + err = mtrr_del (reg, 0, 0); + if (err < 0) return err; + return len; + } + if ( strncmp (line, "base=", 5) ) + { + printk ("mtrr: no \"base=\" in line: \"%s\"\n", line); + return -EINVAL; + } + base = simple_strtoul (line + 5, &ptr, 0); + for (; isspace (*ptr); ++ptr); + if ( strncmp (ptr, "size=", 5) ) + { + printk ("mtrr: no \"size=\" in line: \"%s\"\n", line); + return -EINVAL; + } + size = simple_strtoul (ptr + 5, &ptr, 0); + for (; isspace (*ptr); ++ptr); + if ( strncmp (ptr, "type=", 5) ) + { + printk ("mtrr: no \"type=\" in line: \"%s\"\n", line); + return -EINVAL; + } + ptr += 5; + for (; isspace (*ptr); ++ptr); + for (i = 0; i < MTRR_NUM_TYPES; ++i) + { + if ( strcmp (ptr, mtrr_strings[i]) ) continue; + err = mtrr_add (base, size, i, 1); + if (err < 0) return err; + return len; + } + printk ("mtrr: illegal type: \"%s\"\n", ptr); + return -EINVAL; +} /* End Function mtrr_write */ + +static int mtrr_ioctl (struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int err; + mtrr_type type; + struct mtrr_sentry sentry; + struct mtrr_gentry gentry; + + switch (cmd) + { + default: + return -ENOIOCTLCMD; + case MTRRIOC_ADD_ENTRY: + if ( !suser () ) return -EPERM; + if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) ) + return -EFAULT; + err = mtrr_file_add (sentry.base, sentry.size, sentry.type, 1, file); + if (err < 0) return err; + break; + case MTRRIOC_SET_ENTRY: + if ( !suser () ) return -EPERM; + if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) ) + return -EFAULT; + err = mtrr_add (sentry.base, sentry.size, sentry.type, 0); + if (err < 0) return err; + break; + case MTRRIOC_DEL_ENTRY: + if ( !suser () ) return -EPERM; + if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) ) + return -EFAULT; + err = mtrr_file_del (sentry.base, sentry.size, file); + if (err < 0) return err; + break; + case MTRRIOC_GET_ENTRY: + if ( copy_from_user (&gentry, (void *) arg, sizeof gentry) ) + return -EFAULT; + if ( gentry.regnum >= get_num_var_ranges () ) return -EINVAL; + get_mtrr (gentry.regnum, &gentry.base, &gentry.size, &type); + gentry.type = type; + if ( copy_to_user ( (void *) arg, &gentry, sizeof gentry) ) + return -EFAULT; + break; + } + return 0; +} /* End Function mtrr_ioctl */ + +static int mtrr_open (struct inode *ino, struct file *filep) +{ + MOD_INC_USE_COUNT; + return 0; +} /* End Function mtrr_open */ + +static int mtrr_close (struct inode *ino, struct file *file) +{ + int i, max; + unsigned int *fcount = file->private_data; + + MOD_DEC_USE_COUNT; + if (fcount == NULL) return 0; + max = get_num_var_ranges (); + for (i = 0; i < max; ++i) + { + while (fcount[i] > 0) + { + if (mtrr_del (i, 0, 0) < 0) printk ("mtrr: reg %d not used\n", i); + --fcount[i]; + } + } + kfree (fcount); + file->private_data = NULL; + return 0; +} /* End Function mtrr_close */ + +static struct file_operations mtrr_fops = +{ + NULL, /* Seek */ + mtrr_read, /* Read */ + mtrr_write, /* Write */ + NULL, /* Readdir */ + NULL, /* Poll */ + mtrr_ioctl, /* IOctl */ + NULL, /* MMAP */ + mtrr_open, /* Open */ + mtrr_close, /* Release */ + NULL, /* Fsync */ + NULL, /* Fasync */ + NULL, /* CheckMediaChange */ + NULL, /* Revalidate */ + NULL, /* Lock */ +}; + +static struct inode_operations proc_mtrr_inode_operations = { + &mtrr_fops, /* default property file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static struct proc_dir_entry proc_root_mtrr = { + PROC_MTRR, 4, "mtrr", + S_IFREG | S_IWUSR | S_IRUGO, 1, 0, 0, + 0, &proc_mtrr_inode_operations +}; + +static void compute_ascii (void) +{ + char factor; + int i, max; + mtrr_type type; + unsigned long base, size; + + ascii_buf_bytes = 0; + max = get_num_var_ranges (); + for (i = 0; i < max; i++) + { + get_mtrr (i, &base, &size, &type); + if (size < 1) usage_table[i] = 0; + else + { + if (size < 0x100000) + { + /* 1MB */ + factor = 'k'; + size >>= 10; + } + else + { + factor = 'M'; + size >>= 20; + } + sprintf + (ascii_buffer + ascii_buf_bytes, + "reg%02i: base=0x%08lx (%4liMB), size=%4li%cB: %s, count=%d\n", + i, base, base>>20, size, factor, + attrib_to_str (type), usage_table[i]); + ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes); + } + } + proc_root_mtrr.size = ascii_buf_bytes; +} /* End Function compute_ascii */ + +#endif /* CONFIG_PROC_FS */ + +EXPORT_SYMBOL(mtrr_add); +EXPORT_SYMBOL(mtrr_del); + +#if defined(__SMP__) && !defined(MODULE) + +static volatile unsigned long smp_changes_mask __initdata = 0; +static struct mtrr_state smp_mtrr_state __initdata = {0, 0}; + +__initfunc(void mtrr_init_boot_cpu (void)) +{ + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return; + printk("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", MTRR_VERSION); + + get_mtrr_state (&smp_mtrr_state); +} /* End Function mtrr_init_boot_cpu */ + +__initfunc(void mtrr_init_secondary_cpu (void)) +{ + unsigned long mask, count; + struct set_mtrr_context ctxt; + + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return; + /* Note that this is not ideal, since the cache is only flushed/disabled + for this CPU while the MTRRs are changed, but changing this requires + more invasive changes to the way the kernel boots */ + set_mtrr_prepare (&ctxt); + mask = set_mtrr_state (&smp_mtrr_state, &ctxt); + set_mtrr_done (&ctxt); + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) + { + if (mask & 0x01) set_bit (count, &smp_changes_mask); + mask >>= 1; + } +} /* End Function mtrr_init_secondary_cpu */ + +#endif + +#ifdef MODULE +int init_module (void) +#else +__initfunc(int mtrr_init(void)) +#endif +{ +# if !defined(__SMP__) || defined(MODULE) + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return 0; + printk("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", MTRR_VERSION); +#endif + +# ifdef __SMP__ +# ifdef MODULE + copy_mtrr_state (); +# else /* MODULE */ + finalize_mtrr_state (&smp_mtrr_state); + mtrr_state_warn (smp_changes_mask); +# endif /* MODULE */ +# endif /* __SMP__ */ + +# ifdef CONFIG_PROC_FS + proc_register (&proc_root, &proc_root_mtrr); +# endif + + init_table (); + return 0; +} + +#ifdef MODULE +void cleanup_module (void) +{ + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return; +# ifdef CONFIG_PROC_FS + proc_unregister (&proc_root, PROC_MTRR); +# endif +# ifdef __SMP__ + mtrr_hook = NULL; +# endif +} +#endif diff -u --recursive --new-file v2.1.98/linux/arch/i386/kernel/smp.c linux/arch/i386/kernel/smp.c --- v2.1.98/linux/arch/i386/kernel/smp.c Mon Apr 6 17:40:59 1998 +++ linux/arch/i386/kernel/smp.c Wed Apr 29 22:46:59 1998 @@ -28,6 +28,7 @@ * Alan Cox : Added EBDA scanning */ +#include #include #include #include @@ -47,6 +48,10 @@ #include #include +#ifdef CONFIG_MTRR +# include +#endif + #define __KERNEL_SYSCALLS__ #include @@ -150,6 +155,7 @@ extern int mpc_default_type; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; int mp_current_pci_id = 0; +unsigned long mp_lapic_addr = 0; /* #define SMP_DEBUG */ @@ -270,9 +276,8 @@ printk("APIC at: 0x%lX\n",mpc->mpc_lapic); - /* check the local APIC address */ - if ((char *)phys_to_virt((unsigned long)mpc->mpc_lapic) != APIC_BASE) - panic("unexpected APIC address"); + /* save the local APIC address, it might be non-default */ + mp_lapic_addr = mpc->mpc_lapic; /* * Now process the configuration blocks. @@ -453,7 +458,7 @@ */ cfg=pg0[0]; - pg0[0] = ((unsigned long)APIC_BASE | 7); + pg0[0] = (mp_lapic_addr | 7); local_flush_tlb(); boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID)); @@ -669,6 +674,10 @@ */ __initfunc(int start_secondary(void *unused)) { +#ifdef CONFIG_MTRR + /* Must be done before calibration delay is computed */ + mtrr_init_secondary_cpu (); +#endif smp_callin(); while (!smp_commenced) barrier(); @@ -729,7 +738,7 @@ /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); - printk("Booting processor %d eip %lx: ", i, start_eip); /* So we see what's up */ + printk("Booting processor %d eip %lx\n", i, start_eip); /* So we see what's up */ stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); /* @@ -908,6 +917,10 @@ int i; unsigned long cfg; +#ifdef CONFIG_MTRR + /* Must be done before other processors booted */ + mtrr_init_boot_cpu (); +#endif /* * Initialize the logical to physical cpu number mapping * and the per-CPU profiling counter/multiplier @@ -940,7 +953,7 @@ { printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n"); io_apic_irqs = 0; - return; + goto smp_done; } /* @@ -1099,6 +1112,12 @@ * go and set it up: */ setup_IO_APIC(); + +smp_done: +#ifdef CONFIG_MTRR + /* Must be done after other processors booted */ + mtrr_init (); +#endif } @@ -1189,6 +1208,10 @@ irq = 0x40; break; + case MSG_MTRR_CHANGE: + irq = 0x50; + break; + default: printk("Unknown SMP message %d\n", msg); return; @@ -1485,6 +1508,14 @@ if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) __asm__("hlt"); for (;;) ; +} + +void (*mtrr_hook) (void) = NULL; + +asmlinkage void smp_mtrr_interrupt(void) +{ + ack_APIC_irq (); + if (mtrr_hook) (*mtrr_hook) (); } /* diff -u --recursive --new-file v2.1.98/linux/arch/i386/mm/init.c linux/arch/i386/mm/init.c --- v2.1.98/linux/arch/i386/mm/init.c Wed Apr 8 19:36:25 1998 +++ linux/arch/i386/mm/init.c Tue Apr 28 13:04:00 1998 @@ -90,7 +90,6 @@ shared += atomic_read(&mem_map[i].count) - 1; } printk("%d pages of RAM\n",total); - printk("%d free pages\n",free); printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); @@ -257,6 +256,7 @@ } #ifdef __SMP__ { + extern unsigned long mp_lapic_addr; pte_t pte; unsigned long apic_area = (unsigned long)APIC_BASE; @@ -267,10 +267,13 @@ if (smp_found_config) { /* - * Map the local APIC to FEE00000. + * Map the local APIC to FEE00000. (it's only the default + * value, thanks to Steve Hsieh for finding this out. We + * now save the real local-APIC physical address in smp_scan(), + * and use it here) */ pg_table = pte_offset((pmd_t *)pg_dir, apic_area); - pte = mk_pte(__va(apic_area), PAGE_KERNEL); + pte = mk_pte(__va(mp_lapic_addr), PAGE_KERNEL); set_pte(pg_table, pte); /* diff -u --recursive --new-file v2.1.98/linux/arch/ppc/config.in linux/arch/ppc/config.in --- v2.1.98/linux/arch/ppc/config.in Thu Apr 23 20:21:28 1998 +++ linux/arch/ppc/config.in Tue Apr 28 22:41:33 1998 @@ -48,7 +48,10 @@ define_bool CONFIG_PCI y fi if [ "$CONFIG_PREP" = "y" ]; then - bool 'PCI bridge optimization' CONFIG_PCI_OPTIMIZE + bool 'PCI quirks' CONFIG_PCI_QUIRKS + if [ "$CONFIG_PCI_QUIRKS" = "y" ]; then + bool ' PCI bridge optimization' CONFIG_PCI_OPTIMIZE + fi fi bool 'Backward-compatible /proc/pci' CONFIG_PCI_OLD_PROC bool 'Networking support' CONFIG_NET diff -u --recursive --new-file v2.1.98/linux/arch/ppc/prep_defconfig linux/arch/ppc/prep_defconfig --- v2.1.98/linux/arch/ppc/prep_defconfig Thu Apr 23 20:21:29 1998 +++ linux/arch/ppc/prep_defconfig Tue Apr 28 22:41:33 1998 @@ -23,6 +23,7 @@ CONFIG_MODVERSIONS=y CONFIG_KERNELD=y CONFIG_PCI=y +# CONFIG_PCI_QUIRKS is not set # CONFIG_PCI_OPTIMIZE is not set CONFIG_PCI_OLD_PROC=y CONFIG_NET=y diff -u --recursive --new-file v2.1.98/linux/arch/sparc/mm/sun4c.c linux/arch/sparc/mm/sun4c.c --- v2.1.98/linux/arch/sparc/mm/sun4c.c Thu Apr 23 20:21:31 1998 +++ linux/arch/sparc/mm/sun4c.c Tue Apr 28 12:57:57 1998 @@ -8,6 +8,7 @@ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include #include #include #include diff -u --recursive --new-file v2.1.98/linux/drivers/block/floppy.c linux/drivers/block/floppy.c --- v2.1.98/linux/drivers/block/floppy.c Tue Mar 17 22:18:14 1998 +++ linux/drivers/block/floppy.c Fri Apr 24 12:29:10 1998 @@ -4211,7 +4211,6 @@ if (FDCS->address != -1) fd_outb(FDCS->dor, FD_DOR); fdc = 0; - fd_enable_irq(); return 0; } @@ -4235,7 +4234,6 @@ INT_ON; fd_disable_dma(); fd_free_dma(); - fd_disable_irq(); fd_free_irq(); set_dor(0, ~0, 8); diff -u --recursive --new-file v2.1.98/linux/drivers/block/ide.c linux/drivers/block/ide.c --- v2.1.98/linux/drivers/block/ide.c Sat Apr 25 18:13:11 1998 +++ linux/drivers/block/ide.c Tue Apr 28 12:57:57 1998 @@ -1125,9 +1125,6 @@ static void do_hwgroup_request (ide_hwgroup_t *hwgroup) { if (hwgroup->handler == NULL) { - ide_hwif_t *hgif = hwgroup->hwif; - ide_hwif_t *hwif = hgif; - del_timer(&hwgroup->timer); ide_get_lock(&ide_lock, ide_intr, hwgroup); hwgroup->active = 1; diff -u --recursive --new-file v2.1.98/linux/drivers/block/rd.c linux/drivers/block/rd.c --- v2.1.98/linux/drivers/block/rd.c Tue Mar 17 22:18:14 1998 +++ linux/drivers/block/rd.c Thu Apr 30 09:38:47 1998 @@ -328,7 +328,7 @@ #ifdef RD_LOADER /* - * This routine tries to a ramdisk image to load, and returns the + * This routine tries to find a ramdisk image to load, and returns the * number of blocks to read for a non-compressed image, 0 if the image * is a compressed image, and -1 if an image with the right magic * numbers could not be found. @@ -503,15 +503,21 @@ if (blk_size[MAJOR(device)]) devblocks = blk_size[MAJOR(device)][MINOR(device)]; +#ifdef CONFIG_BLK_DEV_INITRD + if (MAJOR(device) == MAJOR_NR && MINOR(device) == INITRD_MINOR) + devblocks = nblocks; +#endif + if (devblocks == 0) { printk(KERN_ERR "RAMDISK: could not determine device size\n"); goto done; } - printk(KERN_NOTICE "RAMDISK: Loading %d blocks [%d disk(s)] into ram disk... ", nblocks, nblocks/devblocks+1); + printk(KERN_NOTICE "RAMDISK: Loading %d blocks [%d disk%s] into ram disk... ", + nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); for (i=0; i < nblocks; i++) { if (i && (i % devblocks == 0)) { - printk("done.\n"); + printk("done disk #%d.\n", i/devblocks); rotate = 0; invalidate_buffers(device); if (infile.f_op->release) diff -u --recursive --new-file v2.1.98/linux/drivers/macintosh/imstt.c linux/drivers/macintosh/imstt.c --- v2.1.98/linux/drivers/macintosh/imstt.c Thu Apr 23 20:21:33 1998 +++ linux/drivers/macintosh/imstt.c Tue Apr 28 12:57:57 1998 @@ -11,8 +11,6 @@ */ #include -#include - #include #include #include diff -u --recursive --new-file v2.1.98/linux/drivers/net/arcnet.c linux/drivers/net/arcnet.c --- v2.1.98/linux/drivers/net/arcnet.c Thu Feb 12 20:56:07 1998 +++ linux/drivers/net/arcnet.c Fri Apr 24 13:20:33 1998 @@ -18,6 +18,10 @@ ********************** + v3.01 (98/04/17) + - Interrupt handler now also checks dev->[se]dev are non-NULL + to avoid crashes in interrupts during card init. [dw] + v3.00 (97/11/09) - Minor cleanup of debugging messages. [mj] @@ -41,10 +45,10 @@ v2.80 ALPHA (97/08/01) - Split source into multiple files; generic arcnet support and - individual chipset drivers. + individual chipset drivers. - v2.61 ALPHA (97/07/30) by David Woodhouse (dwmw2@cam.ac.uk) for - Nortel (Northern Telecom). + v2.61 ALPHA (97/07/30) by David Woodhouse (Dave@imladris.demon.co.uk) + for Nortel (Northern Telecom). - Added support for IO-mapped modes and for SMC COM20020 chipset. - Fixed (avoided) race condition in send_packet routines which was discovered when the buffer copy routines got slow (?). @@ -170,7 +174,7 @@ */ static const char *version = - "arcnet.c: v3.00 97/11/09 Avery Pennarun et al.\n"; + "arcnet.c: v3.01 98/04/24 Avery Pennarun et al.\n"; #include #include @@ -956,20 +960,24 @@ return; /* don't even try. */ } #ifdef CONFIG_ARCNET_1051 - lp->sdev->interrupt=1; + if (lp->sdev) + lp->sdev->interrupt=1; #endif #ifdef CONFIG_ARCNET_ETH - lp->edev->interrupt=1; + if (lp->edev) + lp->edev->interrupt=1; #endif /* Call the "real" interrupt handler. */ (*lp->inthandler)(dev); #ifdef CONFIG_ARCNET_ETH - lp->edev->interrupt=0; + if (lp->edev) + lp->edev->interrupt=0; #endif #ifdef CONFIG_ARCNET_1051 - lp->sdev->interrupt=0; + if (lp->sdev) + lp->sdev->interrupt=0; #endif if (!test_and_clear_bit(0, (int *)&dev->interrupt)) BUGMSG(D_NORMAL, "Someone cleared our dev->interrupt flag!\n"); diff -u --recursive --new-file v2.1.98/linux/drivers/net/smc-ultra32.c linux/drivers/net/smc-ultra32.c --- v2.1.98/linux/drivers/net/smc-ultra32.c Tue Mar 10 10:03:32 1998 +++ linux/drivers/net/smc-ultra32.c Thu Apr 30 09:33:28 1998 @@ -239,8 +239,9 @@ static int ultra32_open(struct device *dev) { int ioaddr = dev->base_addr - ULTRA32_NIC_OFFSET; /* ASIC addr */ + int irq_flags = (inb(ioaddr + ULTRA32_CFG5) & 0x08) ? 0 : SA_SHIRQ; - if (request_irq(dev->irq, ei_interrupt, 0, ei_status.name, dev)) + if (request_irq(dev->irq, ei_interrupt, irq_flags, ei_status.name, dev)) return -EAGAIN; outb(ULTRA32_MEMENB, ioaddr); /* Enable Shared Memory. */ diff -u --recursive --new-file v2.1.98/linux/drivers/pci/Makefile linux/drivers/pci/Makefile --- v2.1.98/linux/drivers/pci/Makefile Mon Apr 6 17:41:00 1998 +++ linux/drivers/pci/Makefile Tue Apr 28 22:41:33 1998 @@ -28,7 +28,7 @@ endif endif -ifdef CONFIG_PCI_OPTIMIZE +ifdef CONFIG_PCI_QUIRKS L_OBJS += quirks.o endif diff -u --recursive --new-file v2.1.98/linux/drivers/pci/pci.c linux/drivers/pci/pci.c --- v2.1.98/linux/drivers/pci/pci.c Sat Apr 25 18:13:11 1998 +++ linux/drivers/pci/pci.c Tue Apr 28 22:41:33 1998 @@ -319,7 +319,7 @@ /* give BIOS a chance to apply platform specific fixes: */ pcibios_fixup(); -#ifdef CONFIG_PCI_OPTIMIZE +#ifdef CONFIG_PCI_QUIRKS pci_quirks_init(); #endif diff -u --recursive --new-file v2.1.98/linux/drivers/pci/quirks.c linux/drivers/pci/quirks.c --- v2.1.98/linux/drivers/pci/quirks.c Mon Apr 6 17:41:00 1998 +++ linux/drivers/pci/quirks.c Wed Apr 29 22:46:59 1998 @@ -11,6 +11,7 @@ * the bridge optimization, but others might appear later. */ +#include #include #include #include @@ -19,6 +20,8 @@ #undef DEBUG +#ifdef CONFIG_PCI_OPTIMIZE + /* * The PCI Bridge Optimization -- Some BIOS'es are too lazy * and are unable to turn on several features which can burst @@ -103,35 +106,87 @@ } } +#endif + + +/* Deal with broken BIOS'es that neglect to enable passive release, + which can cause problems in combination with the 82441FX/PPro MTRRs */ +__initfunc(static void quirk_passive_release(struct pci_dev *dev, int arg)) +{ + struct pci_dev *piix3; + unsigned char dlc; + + /* We have to make sure a particular bit is set in the PIIX3 + ISA bridge, so we have to go out and find it. */ + for (piix3 = pci_devices; ; piix3 = piix3->next) { + if (!piix3) + return; + + if (piix3->vendor == PCI_VENDOR_ID_INTEL + && piix3->device == PCI_DEVICE_ID_INTEL_82371SB_0) + break; + } + + pcibios_read_config_byte(piix3->bus->number, piix3->devfn, 0x82, &dlc); + + if (!(dlc & 1<<1)) { + printk("PIIX3: Enabling Passive Release\n"); + dlc |= 1<<1; + pcibios_write_config_byte(piix3->bus->number, piix3->devfn, + 0x82, dlc); + } +} + + +typedef void (*quirk_handler)(struct pci_dev *, int); + /* - * Table of quirk handler functions + * Mpping from quirk handler functions to names. */ -#define Q_BRIDGE 0 - -struct quirk_type { - void (*handler)(struct pci_dev *, int); +struct quirk_name { + quirk_handler handler; char *name; }; -static struct quirk_type quirk_types[] __initdata = { +static struct quirk_name quirk_names[] __initdata = { +#ifdef CONFIG_PCI_OPTIMIZE { quirk_bridge, "Bridge optimization" }, +#endif + { quirk_passive_release, "Passive release enable" }, }; + +static inline char *get_quirk_name(quirk_handler handler) +{ + int i; + + for (i = 0; i < sizeof(quirk_names)/sizeof(quirk_names[0]); i++) + if (handler == quirk_names[i].handler) + return quirk_names[i].name; + + return NULL; +} + + /* * Mapping from PCI vendor/device ID pairs to quirk function types and arguments */ struct quirk_info { unsigned short vendor, device; - unsigned short quirk, arg; + quirk_handler handler; + unsigned short arg; }; static struct quirk_info quirk_list[] __initdata = { - { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_BRD, Q_BRIDGE, 0x00 }, - { PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8891A, Q_BRIDGE, 0x01 }, - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424, Q_BRIDGE, 0x00 }, - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82434, Q_BRIDGE, 0x00 } +#ifdef CONFIG_PCI_OPTIMIZE + { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_BRD, quirk_bridge, 0x00 }, + { PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8891A, quirk_bridge, 0x01 }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424, quirk_bridge, 0x00 }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82434, quirk_bridge, 0x00 }, +#endif + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_passive_release, 0x00 }, }; __initfunc(void pci_quirks_init(void)) @@ -146,11 +201,10 @@ for(i=0; ivendor == d->vendor && q->device == d->device) { - struct quirk_type *t = quirk_types + q->quirk; printk("PCI: %02x:%02x [%04x/%04x]: %s (%02x)\n", d->bus->number, d->devfn, d->vendor, d->device, - t->name, q->arg); - t->handler(d, q->arg); + get_quirk_name(q->handler), q->arg); + q->handler(d, q->arg); } } } diff -u --recursive --new-file v2.1.98/linux/drivers/scsi/53c7,8xx.c linux/drivers/scsi/53c7,8xx.c --- v2.1.98/linux/drivers/scsi/53c7,8xx.c Thu Apr 23 20:21:34 1998 +++ linux/drivers/scsi/53c7,8xx.c Wed Apr 29 17:39:25 1998 @@ -1441,9 +1441,8 @@ &command)) || (error = pcibios_read_config_byte (bus, device_fn, PCI_CLASS_REVISION, &revision))) { - printk ("scsi-ncr53c7,8xx : error %s not initializing due to error reading configuration space\n" - " perhaps you specified an incorrect PCI bus, device, or function.\n" - , pcibios_strerror(error)); + printk ("scsi-ncr53c7,8xx : error %d not initializing due to error reading configuration space\n" + " perhaps you specified an incorrect PCI bus, device, or function.\n", error); return -1; } io_port = pdev->base_address[0]; diff -u --recursive --new-file v2.1.98/linux/drivers/scsi/BusLogic.c linux/drivers/scsi/BusLogic.c --- v2.1.98/linux/drivers/scsi/BusLogic.c Sat Apr 25 18:13:11 1998 +++ linux/drivers/scsi/BusLogic.c Fri Apr 24 18:23:03 1998 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1204,10 +1205,13 @@ { BusLogic_AnnounceDriver(HostAdapter); if (HostAdapter->HostAdapterBusType == BusLogic_PCI_Bus) - BusLogic_Error("While configuring BusLogic PCI Host Adapter at\n" - "Bus %d Device %d I/O Address 0x%X PCI Address 0x%X:\n", - HostAdapter, HostAdapter->Bus, HostAdapter->Device, - HostAdapter->IO_Address, HostAdapter->PCI_Address); + { + BusLogic_Error("While configuring BusLogic PCI Host Adapter at\n", + HostAdapter); + BusLogic_Error("Bus %d Device %d I/O Address 0x%X PCI Address 0x%X:\n", + HostAdapter, HostAdapter->Bus, HostAdapter->Device, + HostAdapter->IO_Address, HostAdapter->PCI_Address); + } else BusLogic_Error("While configuring BusLogic Host Adapter at " "I/O Address 0x%X:\n", HostAdapter, HostAdapter->IO_Address); diff -u --recursive --new-file v2.1.98/linux/drivers/scsi/BusLogic.h linux/drivers/scsi/BusLogic.h --- v2.1.98/linux/drivers/scsi/BusLogic.h Thu Apr 23 20:21:34 1998 +++ linux/drivers/scsi/BusLogic.h Thu Apr 30 12:52:35 1998 @@ -1532,7 +1532,6 @@ void BusLogic_AcquireHostAdapterLockIH(BusLogic_HostAdapter_T *HostAdapter, ProcessorFlags_T *ProcessorFlags) { - extern spinlock_t io_request_lock; spin_lock_irqsave(&io_request_lock, *ProcessorFlags); } @@ -1546,7 +1545,6 @@ void BusLogic_ReleaseHostAdapterLockIH(BusLogic_HostAdapter_T *HostAdapter, ProcessorFlags_T *ProcessorFlags) { - extern spinlock_t io_request_lock; spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); } diff -u --recursive --new-file v2.1.98/linux/drivers/scsi/fdomain.c linux/drivers/scsi/fdomain.c --- v2.1.98/linux/drivers/scsi/fdomain.c Thu Apr 23 20:21:35 1998 +++ linux/drivers/scsi/fdomain.c Wed Apr 29 17:39:25 1998 @@ -4,7 +4,7 @@ * Author: Rickard E. Faith, faith@cs.unc.edu * Copyright 1992, 1993, 1994, 1995, 1996 Rickard E. Faith * - * $Id: fdomain.c,v 5.45 1996/10/02 15:13:06 root Exp $ + * Version 5.46 (23-04-1998) * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -20,8 +20,6 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. - * PCI detection rewritten by Martin Mares - ************************************************************************** SUMMARY: @@ -108,6 +106,7 @@ 1.3.85 5.41 4 Apr 1996 2.0.12 5.44 8 Aug 1996 Use ID 7 for all PCI cards 2.1.1 5.45 2 Oct 1996 Update ROM accesses for 2.1.x + 2.1.97 5.46 23 Apr 1998 Rewritten PCI detection routines [mj] @@ -205,6 +204,8 @@ Thanks to Tom Cavin (tec@usa1.com) for preliminary command-line option patches. + + New PCI detection code written by Martin Mares All of the alpha testers deserve much thanks. @@ -886,7 +887,6 @@ #endif #ifdef CONFIG_PCI printk( "\nTMC-3260 36C70 PCI scsi chip detection failed.\n" ); - printk( "Send mail to mckinley@msupa.pa.msu.edu.\n" ); #endif return 0; /* Cannot find valid set of ports */ } diff -u --recursive --new-file v2.1.98/linux/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- v2.1.98/linux/drivers/scsi/scsi.c Tue Apr 14 14:29:23 1998 +++ linux/drivers/scsi/scsi.c Tue Apr 28 12:58:58 1998 @@ -248,6 +248,7 @@ {"SONY","CD-ROM CDU-55S","1.0i", BLIST_NOLUN}, {"SONY","CD-ROM CDU-561","1.7x", BLIST_NOLUN}, {"TANDBERG","TDC 3600","U07", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ +{"TEAC","CD-R55S","1.0H", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ {"TEAC","CD-ROM","1.06", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1 * for seagate controller, which causes * SCSI code to reset bus.*/ diff -u --recursive --new-file v2.1.98/linux/drivers/scsi/scsi_ioctl.c linux/drivers/scsi/scsi_ioctl.c --- v2.1.98/linux/drivers/scsi/scsi_ioctl.c Tue Apr 14 14:29:23 1998 +++ linux/drivers/scsi/scsi_ioctl.c Sat Apr 25 22:15:22 1998 @@ -167,7 +167,7 @@ * interface instead, as this is a more flexible approach to performing * generic SCSI commands on a device. */ -static int ioctl_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic) +int scsi_ioctl_send_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic) { unsigned long flags; char * buf; @@ -387,7 +387,8 @@ return ioctl_probe(dev->host, arg); case SCSI_IOCTL_SEND_COMMAND: if(!suser()) return -EACCES; - return ioctl_command((Scsi_Device *) dev, (Scsi_Ioctl_Command *) arg); + return scsi_ioctl_send_command((Scsi_Device *) dev, + (Scsi_Ioctl_Command *) arg); case SCSI_IOCTL_DOORLOCK: if (!dev->removable || !dev->lockable) return 0; scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL; diff -u --recursive --new-file v2.1.98/linux/drivers/scsi/sg.c linux/drivers/scsi/sg.c --- v2.1.98/linux/drivers/scsi/sg.c Tue Apr 14 14:29:23 1998 +++ linux/drivers/scsi/sg.c Sat Apr 25 22:15:22 1998 @@ -97,6 +97,13 @@ return scsi_generics[dev].timeout; case SG_EMULATED_HOST: return put_user(scsi_generics[dev].device->host->hostt->emulated, (int *) arg); + case SCSI_IOCTL_SEND_COMMAND: + /* + Allow SCSI_IOCTL_SEND_COMMAND without checking suser() since the + user already has read/write access to the generic device and so + can execute arbitrary SCSI commands. + */ + return scsi_ioctl_send_command(scsi_generics[dev].device, (void *) arg); default: return scsi_ioctl(scsi_generics[dev].device, cmd_in, (void *) arg); } diff -u --recursive --new-file v2.1.98/linux/fs/binfmt_elf.c linux/fs/binfmt_elf.c --- v2.1.98/linux/fs/binfmt_elf.c Sat Apr 25 18:13:12 1998 +++ linux/fs/binfmt_elf.c Sat Apr 25 22:51:31 1998 @@ -517,9 +517,10 @@ retval = PTR_ERR(interpreter_dentry); if (IS_ERR(interpreter_dentry)) goto out_free_interp; - - retval = read_exec(interpreter_dentry, 0, bprm->buf, - 128, 1); + retval = permission(interpreter_dentry->d_inode, MAY_EXEC); + if (retval < 0) + goto out_free_dentry; + retval = read_exec(interpreter_dentry, 0, bprm->buf, 128, 1); if (retval < 0) goto out_free_dentry; diff -u --recursive --new-file v2.1.98/linux/fs/dcache.c linux/fs/dcache.c --- v2.1.98/linux/fs/dcache.c Fri Apr 10 13:03:49 1998 +++ linux/fs/dcache.c Mon Apr 27 14:36:11 1998 @@ -426,42 +426,16 @@ } /* - * This is called from do_try_to_free_page() to indicate - * that we should reduce the dcache and inode cache memory. + * This is called from kswapd when we think we need some + * more memory, but aren't really sure how much. So we + * carefully try to free a _bit_ of our dcache, but not + * too much. */ -void shrink_dcache_memory() +void shrink_dcache_memory(void) { - dentry_stat.want_pages++; -} - -/* - * This carries out the request received by the above routine. - */ -void check_dcache_memory() -{ - if (dentry_stat.want_pages) { - unsigned int count, goal = 0; - /* - * Set the page goal. We don't necessarily need to trim - * the dcache just because the system needs memory ... - */ - if (page_cache_size > (num_physpages >> 1)) - goal = (dentry_stat.want_pages * page_cache_size) - / num_physpages; - dentry_stat.want_pages = 0; - if (goal) { - if (goal > 50) - goal = 50; - count = select_dcache(32, goal); -#ifdef DCACHE_DEBUG -printk(KERN_DEBUG "check_dcache_memory: goal=%d, count=%d\n", goal, count); -#endif - if (count) { - prune_dcache(count); - free_inode_memory(count); - } - } - } + int count = select_dcache(32, 8); + if (count) + prune_dcache(count); } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) diff -u --recursive --new-file v2.1.98/linux/fs/ext2/namei.c linux/fs/ext2/namei.c --- v2.1.98/linux/fs/ext2/namei.c Mon Apr 6 17:41:00 1998 +++ linux/fs/ext2/namei.c Tue Apr 28 14:49:24 1998 @@ -572,6 +572,7 @@ ext2_warning (inode->i_sb, "empty_dir", "bad directory (dir #%lu) - no `.' or `..'", inode->i_ino); + brelse (bh); return 1; } offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); diff -u --recursive --new-file v2.1.98/linux/fs/namei.c linux/fs/namei.c --- v2.1.98/linux/fs/namei.c Mon Jan 12 15:03:28 1998 +++ linux/fs/namei.c Mon Apr 27 13:45:06 1998 @@ -454,7 +454,6 @@ char *name; struct dentry *dentry; - check_dcache_memory(); name = getname(pathname); dentry = (struct dentry *) name; if (!IS_ERR(name)) { @@ -528,7 +527,6 @@ struct inode *inode; struct dentry *dentry; - check_dcache_memory(); mode &= S_IALLUGO & ~current->fs->umask; mode |= S_IFREG; diff -u --recursive --new-file v2.1.98/linux/include/asm-alpha/checksum.h linux/include/asm-alpha/checksum.h --- v2.1.98/linux/include/asm-alpha/checksum.h Mon Apr 14 16:28:18 1997 +++ linux/include/asm-alpha/checksum.h Tue Apr 28 22:28:10 1998 @@ -18,6 +18,10 @@ unsigned short proto, unsigned int sum); +unsigned int csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, + unsigned short len, unsigned short proto, + unsigned int sum); + /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) @@ -54,6 +58,9 @@ * but continues and zeros the rest of the buffer. */ unsigned int csum_partial_copy_from_user(const char *src, char *dst, int len, unsigned int sum, int *errp); + +unsigned int csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum); + /* * this routine is used for miscellaneous IP-like checksums, mainly diff -u --recursive --new-file v2.1.98/linux/include/asm-i386/bugs.h linux/include/asm-i386/bugs.h --- v2.1.98/linux/include/asm-i386/bugs.h Wed Apr 1 20:11:54 1998 +++ linux/include/asm-i386/bugs.h Thu Apr 30 12:51:33 1998 @@ -14,6 +14,10 @@ #include #include +#ifdef CONFIG_MTRR +# include +#endif + #define CONFIG_BUGi386 __initfunc(static void no_halt(char *s, int *ints)) @@ -236,4 +240,10 @@ check_amd_k6(); check_pentium_f00f(); system_utsname.machine[1] = '0' + boot_cpu_data.x86; +#if !defined(__SMP__) && defined(CONFIG_MTRR) + /* Must be done after other processors booted: at this point we are + called before SMP initialisation, so this is for the non-SMP case + only. The SMP case is handled in arch/i386/kernel/smp.c */ + mtrr_init (); +#endif } diff -u --recursive --new-file v2.1.98/linux/include/asm-i386/checksum.h linux/include/asm-i386/checksum.h --- v2.1.98/linux/include/asm-i386/checksum.h Fri Feb 7 05:54:54 1997 +++ linux/include/asm-i386/checksum.h Tue Apr 28 22:28:10 1998 @@ -1,6 +1,7 @@ #ifndef _I386_CHECKSUM_H #define _I386_CHECKSUM_H + /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) @@ -44,6 +45,12 @@ return csum_partial_copy_generic ( src, dst, len, sum, err_ptr, dst_err_ptr); } +#if 0 + +/* Not used at the moment. It is difficult to imagine for what purpose + it can be used :-) Please, do not forget to verify_area before it --ANK + */ + /* * This combination is currently not used, but possible: */ @@ -56,6 +63,7 @@ return csum_partial_copy_generic ( src, dst, len, sum, src_err_ptr, err_ptr); } +#endif /* * These are the old (and unsafe) way of doing checksums, a warning message will be @@ -121,16 +129,12 @@ return (~sum) >> 16; } -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ - -static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, +static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, unsigned short len, unsigned short proto, - unsigned int sum) { + unsigned int sum) +{ __asm__(" addl %1, %0 adcl %2, %0 @@ -139,8 +143,22 @@ " : "=r" (sum) : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); - return csum_fold(sum); + return sum; } + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + /* * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c diff -u --recursive --new-file v2.1.98/linux/include/asm-i386/mtrr.h linux/include/asm-i386/mtrr.h --- v2.1.98/linux/include/asm-i386/mtrr.h Wed Dec 31 16:00:00 1969 +++ linux/include/asm-i386/mtrr.h Wed Apr 29 22:46:59 1998 @@ -0,0 +1,103 @@ +/* Generic MTRR (Memory Type Range Register) ioctls. + + Copyright (C) 1997-1998 Richard Gooch + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ +#ifndef _LINUX_MTRR_H +#define _LINUX_MTRR_H + +#include +#include + +#define MTRR_IOCTL_BASE 'M' + +struct mtrr_sentry +{ + unsigned long base; /* Base address */ + unsigned long size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + +struct mtrr_gentry +{ + unsigned int regnum; /* Register number */ + unsigned long base; /* Base address */ + unsigned long size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + +/* These are the various ioctls */ +#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) +#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) +#define MTRRIOC_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry) +#define MTRRIOC_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry) + +/* These are the region types */ +#define MTRR_TYPE_UNCACHABLE 0 +#define MTRR_TYPE_WRCOMB 1 +/*#define MTRR_TYPE_ 2*/ +/*#define MTRR_TYPE_ 3*/ +#define MTRR_TYPE_WRTHROUGH 4 +#define MTRR_TYPE_WRPROT 5 +#define MTRR_TYPE_WRBACK 6 +#define MTRR_NUM_TYPES 7 + +#ifdef MTRR_NEED_STRINGS +static char *mtrr_strings[MTRR_NUM_TYPES] = +{ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ +}; +#endif + +#ifdef __KERNEL__ + +/* The following functions are for use by other drivers */ +# if defined(CONFIG_MTRR) || defined(CONFIG_MTRR_MODULE) +extern int mtrr_add (unsigned long base, unsigned long size, + unsigned int type, char increment); +extern int mtrr_del (int reg, unsigned long base, unsigned long size); +# else +static __inline__ int mtrr_add (unsigned long base, unsigned long size, + unsigned int type, char increment) +{ + return -ENODEV; +} +static __inline__ mtrr_del (int reg, unsigned long base, unsigned long size) +{ + return -ENODEV; +} +# endif + +/* The following functions are for initialisation: don't use them! */ +extern int mtrr_init (void); +# if defined(__SMP__) && defined(CONFIG_MTRR) +extern void mtrr_init_boot_cpu (void); +extern void mtrr_init_secondary_cpu (void); +# endif + +#endif + +#endif /* _LINUX_MTRR_H */ diff -u --recursive --new-file v2.1.98/linux/include/asm-i386/smp.h linux/include/asm-i386/smp.h --- v2.1.98/linux/include/asm-i386/smp.h Mon Apr 6 17:41:01 1998 +++ linux/include/asm-i386/smp.h Tue Apr 28 22:41:33 1998 @@ -168,6 +168,7 @@ extern unsigned long ipi_count; extern void smp_invalidate_rcv(void); /* Process an NMI */ extern void smp_local_timer_interrupt(struct pt_regs * regs); +extern void (*mtrr_hook) (void); extern void setup_APIC_clock (void); extern volatile int __cpu_logical_map[NR_CPUS]; extern inline int cpu_logical_map(int cpu) diff -u --recursive --new-file v2.1.98/linux/include/asm-m68k/checksum.h linux/include/asm-m68k/checksum.h --- v2.1.98/linux/include/asm-m68k/checksum.h Wed Apr 23 19:01:27 1997 +++ linux/include/asm-m68k/checksum.h Tue Apr 28 22:28:10 1998 @@ -87,8 +87,8 @@ * returns a 16-bit checksum, already complemented */ -static inline unsigned short int -csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, unsigned short len, +static inline unsigned int +csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, unsigned short len, unsigned short proto, unsigned int sum) { __asm__ ("addl %1,%0\n\t" @@ -99,7 +99,14 @@ : "=&d" (sum), "=&d" (saddr) : "0" (daddr), "1" (saddr), "d" (len + proto), "d"(sum)); - return csum_fold(sum); + return sum; +} + +static inline unsigned short int +csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, unsigned short len, + unsigned short proto, unsigned int sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); } /* diff -u --recursive --new-file v2.1.98/linux/include/asm-ppc/checksum.h linux/include/asm-ppc/checksum.h --- v2.1.98/linux/include/asm-ppc/checksum.h Sat Aug 16 09:51:09 1997 +++ linux/include/asm-ppc/checksum.h Tue Apr 28 22:28:10 1998 @@ -34,6 +34,9 @@ #define csum_partial_copy_from_user(src, dst, len, sum, errp) \ csum_partial_copy_generic((src), (dst), (len), (sum), (errp), 0) +/* FIXME: this needs to be written to really do no check -- Cort */ +#define csum_partial_copy_nocheck(src, dst, len, sum) \ + csum_partial_copy_generic((src), (dst), (len), (sum), 0, 0) /* * Old versions which ignore errors. */ @@ -67,6 +70,27 @@ static inline unsigned short ip_compute_csum(unsigned char * buff, int len) { return csum_fold(csum_partial(buff, len, 0)); +} + +/* + * FIXME: I swiped this one from the sparc and made minor modifications. + * It may not be correct. -- Cort + */ +static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + __asm__(" + add %0,%0,%1 + add %0,%0,%2 + add %0,%0,%0 + addi %0,%0,0 + " + : "=r" (sum) + : "r" (daddr), "r"(saddr), "r"((ntohs(len)<<16)+proto*256), "0"(sum)); + return sum; } /* diff -u --recursive --new-file v2.1.98/linux/include/asm-sparc/checksum.h linux/include/asm-sparc/checksum.h --- v2.1.98/linux/include/asm-sparc/checksum.h Mon Apr 14 16:28:19 1997 +++ linux/include/asm-sparc/checksum.h Tue Apr 28 22:28:10 1998 @@ -1,4 +1,4 @@ -/* $Id: checksum.h,v 1.27 1997/04/11 00:42:18 davem Exp $ */ +/* $Id: checksum.h,v 1.28 1998/04/17 02:37:25 davem Exp $ */ #ifndef __SPARC_CHECKSUM_H #define __SPARC_CHECKSUM_H @@ -158,10 +158,22 @@ return sum; } -/* computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -extern __inline__ unsigned short csum_tcpudp_magic(unsigned long saddr, +/* Fold a partial checksum without adding pseudo headers. */ +extern __inline__ unsigned int csum_fold(unsigned int sum) +{ + unsigned int tmp; + + __asm__ __volatile__("addcc\t%0, %1, %1\n\t" + "srl\t%1, 16, %1\n\t" + "addx\t%1, %%g0, %1\n\t" + "xnor\t%%g0, %1, %0" + : "=&r" (sum), "=r" (tmp) + : "0" (sum), "1" (sum<<16) + : "cc"); + return sum; +} + +extern __inline__ unsigned long csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, unsigned int len, unsigned short proto, @@ -171,11 +183,6 @@ "addxcc\t%2, %0, %0\n\t" "addxcc\t%3, %0, %0\n\t" "addx\t%0, %%g0, %0\n\t" - "sll\t%0, 16, %1\n\t" - "addcc\t%1, %0, %0\n\t" - "srl\t%0, 16, %0\n\t" - "addx\t%0, %%g0, %0\n\t" - "xnor\t%%g0, %0, %0" : "=r" (sum), "=r" (saddr) : "r" (daddr), "r" ((proto<<16)+len), "0" (sum), "1" (saddr) @@ -183,19 +190,17 @@ return sum; } -/* Fold a partial checksum without adding pseudo headers. */ -extern __inline__ unsigned int csum_fold(unsigned int sum) +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) { - unsigned int tmp; - - __asm__ __volatile__("addcc\t%0, %1, %1\n\t" - "srl\t%1, 16, %1\n\t" - "addx\t%1, %%g0, %1\n\t" - "xnor\t%%g0, %1, %0" - : "=&r" (sum), "=r" (tmp) - : "0" (sum), "1" (sum<<16) - : "cc"); - return sum; + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); } #define _HAVE_ARCH_IPV6_CSUM diff -u --recursive --new-file v2.1.98/linux/include/asm-sparc64/checksum.h linux/include/asm-sparc64/checksum.h --- v2.1.98/linux/include/asm-sparc64/checksum.h Sat Aug 16 09:51:10 1997 +++ linux/include/asm-sparc64/checksum.h Tue Apr 28 22:28:10 1998 @@ -1,4 +1,4 @@ -/* $Id: checksum.h,v 1.10 1997/08/09 18:09:03 jj Exp $ */ +/* $Id: checksum.h,v 1.11 1998/04/17 02:37:22 davem Exp $ */ #ifndef __SPARC64_CHECKSUM_H #define __SPARC64_CHECKSUM_H @@ -116,10 +116,23 @@ return sum; } -/* computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -extern __inline__ unsigned short csum_tcpudp_magic(unsigned long saddr, +/* Fold a partial checksum without adding pseudo headers. */ +extern __inline__ unsigned short csum_fold(unsigned int sum) +{ + unsigned int tmp; + + __asm__ __volatile__(" + addcc %0, %1, %1 + srl %1, 16, %1 + addc %1, %%g0, %1 + xnor %%g0, %1, %0 +" : "=&r" (sum), "=r" (tmp) + : "0" (sum), "1" (sum<<16) + : "cc"); + return (sum & 0xffff); +} + +extern __inline__ unsigned long csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, unsigned int len, unsigned short proto, @@ -130,31 +143,23 @@ addccc %2, %0, %0 addccc %3, %0, %0 addc %0, %%g0, %0 - sll %0, 16, %1 - addcc %1, %0, %0 - srl %0, 16, %0 - addc %0, %%g0, %0 - xnor %%g0, %0, %0 " : "=r" (sum), "=r" (saddr) : "r" (daddr), "r" ((proto<<16)+len), "0" (sum), "1" (saddr) : "cc"); - return (sum & 0xffff); + return sum; } -/* Fold a partial checksum without adding pseudo headers. */ -extern __inline__ unsigned short csum_fold(unsigned int sum) +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) { - unsigned int tmp; - - __asm__ __volatile__(" - addcc %0, %1, %1 - srl %1, 16, %1 - addc %1, %%g0, %1 - xnor %%g0, %1, %0 -" : "=&r" (sum), "=r" (tmp) - : "0" (sum), "1" (sum<<16) - : "cc"); - return (sum & 0xffff); + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); } #define _HAVE_ARCH_IPV6_CSUM diff -u --recursive --new-file v2.1.98/linux/include/linux/if_ec.h linux/include/linux/if_ec.h --- v2.1.98/linux/include/linux/if_ec.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/if_ec.h Tue Apr 28 11:10:10 1998 @@ -0,0 +1,47 @@ +/* Definitions for Econet sockets. */ + +#ifndef __LINUX_IF_EC +#define __LINUX_IF_EC + +/* User visible stuff. Glibc provides its own but libc5 folk will use these */ + +struct ec_addr +{ + unsigned char station; /* Station number. */ + unsigned char net; /* Network number. */ +}; + +struct sockaddr_ec +{ + unsigned short sec_family; + unsigned char port; /* Port number. */ + unsigned char cb; /* Control/flag byte. */ + unsigned char type; /* Type of message. */ + struct ec_addr addr; + unsigned long cookie; +}; + +#define ECTYPE_PACKET_RECEIVED 0 /* Packet received */ +#define ECTYPE_TRANSMIT_STATUS 0x10 /* Transmit completed, + low nibble holds status */ + +#define ECTYPE_TRANSMIT_OK 1 +#define ECTYPE_TRANSMIT_NOT_LISTENING 2 +#define ECTYPE_TRANSMIT_NET_ERROR 3 +#define ECTYPE_TRANSMIT_NO_CLOCK 4 +#define ECTYPE_TRANSMIT_LINE_JAMMED 5 +#define ECTYPE_TRANSMIT_NOT_PRESENT 6 + +#ifdef __KERNEL__ + +struct econet_opt +{ + unsigned char cb; + unsigned char port; + unsigned char station; + unsigned char net; +}; + +#endif + +#endif diff -u --recursive --new-file v2.1.98/linux/include/linux/netdevice.h linux/include/linux/netdevice.h --- v2.1.98/linux/include/linux/netdevice.h Thu Mar 26 15:57:05 1998 +++ linux/include/linux/netdevice.h Thu Apr 30 12:52:26 1998 @@ -256,6 +256,7 @@ struct Qdisc *qdisc; struct Qdisc *qdisc_sleeping; + struct Qdisc *qdisc_list; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* Pointers to interface service routines. */ diff -u --recursive --new-file v2.1.98/linux/include/linux/netlink.h linux/include/linux/netlink.h --- v2.1.98/linux/include/linux/netlink.h Tue Mar 10 10:03:35 1998 +++ linux/include/linux/netlink.h Tue Apr 28 11:10:10 1998 @@ -161,7 +161,7 @@ } #define NLMSG_PUT(skb, pid, seq, type, len) \ -({ if (skb_tailroom(skb) < NLMSG_SPACE(len)) goto nlmsg_failure; \ +({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) goto nlmsg_failure; \ __nlmsg_put(skb, pid, seq, type, len); }) extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, diff -u --recursive --new-file v2.1.98/linux/include/linux/pkt_cls.h linux/include/linux/pkt_cls.h --- v2.1.98/linux/include/linux/pkt_cls.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/pkt_cls.h Tue Apr 28 11:10:10 1998 @@ -0,0 +1,117 @@ +#ifndef __LINUX_PKT_CLS_H +#define __LINUX_PKT_CLS_H + +struct tc_police +{ + __u32 index; + int action; +#define TC_POLICE_UNSPEC (-1) +#define TC_POLICE_OK 0 +#define TC_POLICE_RECLASSIFY 1 +#define TC_POLICE_SHOT 2 + + __u32 limit; + __u32 burst; + __u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; +}; + +enum +{ + TCA_POLICE_UNSPEC, + TCA_POLICE_TBF, + TCA_POLICE_RATE, + TCA_POLICE_PEAKRATE, +}; + +#define TCA_POLICE_MAX TCA_POLICE_PEAKRATE + +/* U32 filters */ + +#define TC_U32_HTID(h) ((h)&0xFFF00000) +#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20) +#define TC_U32_HASH(h) (((h)>>12)&0xFF) +#define TC_U32_NODE(h) ((h)&0xFFF) +#define TC_U32_KEY(h) ((h)&0xFFFFF) +#define TC_U32_UNSPEC 0 +#define TC_U32_ROOT (0xFFF00000) + +enum +{ + TCA_U32_UNSPEC, + TCA_U32_CLASSID, + TCA_U32_HASH, + TCA_U32_LINK, + TCA_U32_DIVISOR, + TCA_U32_SEL, + TCA_U32_POLICE, +}; + +#define TCA_U32_MAX TCA_U32_POLICE + +struct tc_u32_key +{ + __u32 mask; + __u32 val; + int off; + int offmask; +}; + +struct tc_u32_sel +{ + unsigned char flags; + unsigned char offshift; + unsigned char nkeys; + + __u16 offmask; + __u16 off; + short offoff; + + short hoff; + __u32 hmask; + + struct tc_u32_key keys[0]; +}; + +/* Flags */ + +#define TC_U32_TERMINAL 1 +#define TC_U32_OFFSET 2 +#define TC_U32_VAROFFSET 4 +#define TC_U32_EAT 8 + +#define TC_U32_MAXDEPTH 8 + + +/* RSVP filter */ + +enum +{ + TCA_RSVP_UNSPEC, + TCA_RSVP_CLASSID, + TCA_RSVP_DST, + TCA_RSVP_SRC, + TCA_RSVP_PINFO, + TCA_RSVP_POLICE, +}; + +#define TCA_RSVP_MAX TCA_RSVP_POLICE + +struct tc_rsvp_gpi +{ + __u32 key; + __u32 mask; + int offset; +}; + +struct tc_rsvp_pinfo +{ + struct tc_rsvp_gpi dpi; + struct tc_rsvp_gpi spi; + __u8 protocol; + __u8 tunnelid; + __u8 tunnelhdr; +}; + +#endif diff -u --recursive --new-file v2.1.98/linux/include/linux/pkt_sched.h linux/include/linux/pkt_sched.h --- v2.1.98/linux/include/linux/pkt_sched.h Sun Nov 30 14:00:38 1997 +++ linux/include/linux/pkt_sched.h Tue Apr 28 11:10:10 1998 @@ -1,15 +1,17 @@ #ifndef __LINUX_PKT_SCHED_H #define __LINUX_PKT_SCHED_H -#define PSCHED_TC_INIT 1 -#define PSCHED_TC_DESTROY 2 -#define PSCHED_TC_ATTACH 3 -#define PSCHED_TC_DETACH 4 - - -/* "Logical" priority bands, not depending of concrete packet scheduler. - Every scheduler will map them to real traffic classes, if it have - no more precise machanism. +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. */ #define TC_PRIO_BESTEFFORT 0 @@ -19,75 +21,257 @@ #define TC_PRIO_INTERACTIVE 6 #define TC_PRIO_CONTROL 7 +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats +{ + __u64 bytes; /* NUmber of enqueues bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +}; -struct pschedctl +struct tc_estimator { - int command; - int handle; - int child; - int ifindex; - char id[IFNAMSIZ]; - int arglen; - char args[0]; + char interval; + unsigned char ewma_log; }; -/* CBQ section */ +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) + +struct tc_ratespec +{ + unsigned char cell_log; + unsigned char __reserved; + unsigned short feature; + short addend; + unsigned short mpu; + __u32 rate; +}; + +/* FIFO section */ + +struct tc_fifo_qopt +{ + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 -#define CBQ_MAXPRIO 8 -#define CBQ_MAXLEVEL 8 +struct tc_prio_qopt +{ + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; /* CSZ section */ -struct cszctl +struct tc_csz_qopt +{ + int flows; /* Maximal number of guaranteed flows */ + unsigned char R_log; /* Fixed point position for round number */ + unsigned char delta_log; /* Log of maximal managed time interval */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> CSZ band */ +}; + +struct tc_csz_copt { - int flow_id; - int handle; - unsigned long rate; - unsigned long max_bytes; - unsigned long depth; - unsigned long L_tab[256]; + struct tc_ratespec slice; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; }; -struct cszinitctl +enum { - int flows; - unsigned cell_log; + TCA_CSZ_UNSPEC, + TCA_CSZ_PARMS, + TCA_CSZ_RTAB, + TCA_CSZ_PTAB, }; /* TBF section */ -struct tbfctl +struct tc_tbf_qopt +{ + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum { - unsigned cell_log; - unsigned long bytes; - unsigned long depth; - unsigned long L_tab[256]; + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, }; + +/* TEQL section */ + +/* TEQL does not require any parameters */ + /* SFQ section */ -struct sfqctl +struct tc_sfq_qopt { - unsigned quantum; - unsigned depth; - unsigned divisor; - unsigned flows; + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ }; +/* + * NOTE: limit, divisor and flows are hardwired to code at the moment. + * + * limit=flows=128, divisor=1024; + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. + */ + /* RED section */ -struct redctl +enum +{ + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, +}; + +struct tc_red_qopt +{ + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ +}; + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt +{ + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt +{ + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl +{ + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u32 penalty; +}; + +struct tc_cbq_police +{ + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt +{ + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats +{ + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum { - unsigned qmaxbytes; /* HARD maximal queue length */ - unsigned qth_min; /* Min average length threshold: A scaled */ - unsigned qth_max; /* Max average length threshold: A scaled */ - char Alog; /* Point position in average lengths */ - char Wlog; /* log(W) */ - char Rlog; /* random number bits */ - char C1log; /* log(1/C1) */ - char Slog; - char Stab[256]; + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, }; +#define TCA_CBQ_MAX TCA_CBQ_POLICE #endif diff -u --recursive --new-file v2.1.98/linux/include/linux/proc_fs.h linux/include/linux/proc_fs.h --- v2.1.98/linux/include/linux/proc_fs.h Thu Apr 23 20:21:38 1998 +++ linux/include/linux/proc_fs.h Thu Apr 30 12:51:42 1998 @@ -49,7 +49,8 @@ PROC_SLABINFO, PROC_PARPORT, PROC_PPC_HTAB, - PROC_SOUND + PROC_SOUND, + PROC_MTRR, /* whether enabled or not */ }; enum pid_directory_inos { diff -u --recursive --new-file v2.1.98/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h --- v2.1.98/linux/include/linux/rtnetlink.h Thu Mar 26 15:57:06 1998 +++ linux/include/linux/rtnetlink.h Tue Apr 28 11:10:10 1998 @@ -37,12 +37,12 @@ #define RTM_GETRULE (RTM_BASE+18) #define RTM_NEWQDISC (RTM_BASE+20) -#define RTM_DELQDSIC (RTM_BASE+21) +#define RTM_DELQDISC (RTM_BASE+21) #define RTM_GETQDISC (RTM_BASE+22) -#define RTM_NEWTFLOW (RTM_BASE+24) -#define RTM_DELTFLOW (RTM_BASE+25) -#define RTM_GETTFLOW (RTM_BASE+26) +#define RTM_NEWTCLASS (RTM_BASE+24) +#define RTM_DELTCLASS (RTM_BASE+25) +#define RTM_GETTCLASS (RTM_BASE+26) #define RTM_NEWTFILTER (RTM_BASE+28) #define RTM_DELTFILTER (RTM_BASE+29) @@ -533,10 +533,11 @@ TCA_KIND, TCA_OPTIONS, TCA_STATS, - TCA_XSTATS + TCA_XSTATS, + TCA_RATE, }; -#define TCA_MAX TCA_XSTATS +#define TCA_MAX TCA_RATE #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) @@ -551,6 +552,7 @@ #define RTMGRP_LINK 1 #define RTMGRP_NOTIFY 2 #define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 #define RTMGRP_IPV4_IFADDR 0x10 #define RTMGRP_IPV4_MROUTE 0x20 @@ -567,6 +569,14 @@ extern atomic_t rtnl_rlockct; extern struct wait_queue *rtnl_wait; +extern __inline__ int rtattr_strcmp(struct rtattr *rta, char *str) +{ + int len = strlen(str) + 1; + return len > rta->rta_len || memcmp(RTA_DATA(rta), str, len); +} + +extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); + #ifdef CONFIG_RTNETLINK extern struct sock *rtnl; @@ -578,12 +588,12 @@ extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb); - +extern int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); #define RTA_PUT(skb, attrtype, attrlen, data) \ -({ if (skb_tailroom(skb) < RTA_SPACE(attrlen)) goto rtattr_failure; \ +({ if (skb_tailroom(skb) < (int)RTA_SPACE(attrlen)) goto rtattr_failure; \ __rta_fill(skb, attrtype, attrlen, data); }) extern unsigned long rtnl_wlockct; diff -u --recursive --new-file v2.1.98/linux/include/linux/smp.h linux/include/linux/smp.h --- v2.1.98/linux/include/linux/smp.h Fri Jan 23 18:10:32 1998 +++ linux/include/linux/smp.h Tue Apr 28 22:41:33 1998 @@ -47,7 +47,8 @@ #define MSG_STOP_CPU 0x0002 /* Sent to shut down slave CPU's * when rebooting */ -#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU */ +#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/ +#define MSG_MTRR_CHANGE 0x0004 /* Change MTRR */ #else diff -u --recursive --new-file v2.1.98/linux/include/linux/socket.h linux/include/linux/socket.h --- v2.1.98/linux/include/linux/socket.h Tue Apr 14 14:29:26 1998 +++ linux/include/linux/socket.h Tue Apr 28 11:10:10 1998 @@ -60,7 +60,7 @@ #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) -#define CMSG_DATA(cmsg) ((void *)(cmsg) + CMSG_ALIGN(sizeof(struct cmsghdr))) +#define CMSG_DATA(cmsg) ((void *)((char *)(cmsg) + CMSG_ALIGN(sizeof(struct cmsghdr)))) #define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len)) #define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len)) @@ -75,6 +75,10 @@ #ifdef __KERNEL__ #define __KINLINE extern __inline__ +#elif defined(__GNUC__) +#define __KINLINE static __inline__ +#elif defined(__cplusplus) +#define __KINLINE static inline #else #define __KINLINE static #endif @@ -138,7 +142,7 @@ #define AF_APPLETALK 5 /* Appletalk DDP */ #define AF_NETROM 6 /* Amateur Radio NET/ROM */ #define AF_BRIDGE 7 /* Multiprotocol bridge */ -#define AF_AAL5 8 /* Reserved for Werner's ATM */ +#define AF_ATMPVC 8 /* ATM PVCs */ #define AF_X25 9 /* Reserved for X.25 project */ #define AF_INET6 10 /* IP version 6 */ #define AF_ROSE 11 /* Amateur Radio X.25 PLP */ @@ -151,6 +155,7 @@ #define AF_PACKET 17 /* Packet family */ #define AF_ASH 18 /* Ash */ #define AF_ECONET 19 /* Acorn Econet */ +#define AF_ATMSVC 20 /* ATM SVCs */ #define AF_MAX 32 /* For now.. */ /* Protocol families, same as address families. */ @@ -163,7 +168,7 @@ #define PF_APPLETALK AF_APPLETALK #define PF_NETROM AF_NETROM #define PF_BRIDGE AF_BRIDGE -#define PF_AAL5 AF_AAL5 +#define PF_ATMPVC AF_ATMPVC #define PF_X25 AF_X25 #define PF_INET6 AF_INET6 #define PF_ROSE AF_ROSE @@ -175,6 +180,7 @@ #define PF_ROUTE AF_ROUTE #define PF_PACKET AF_PACKET #define PF_ASH AF_ASH +#define PF_ATMSVC AF_ATMSVC #define PF_MAX AF_MAX @@ -223,6 +229,8 @@ #define SOL_DECNET 261 #define SOL_X25 262 #define SOL_PACKET 263 +#define SOL_ATM 264 /* ATM layer (cell level) */ +#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */ /* IPX options */ #define IPX_TYPE 1 diff -u --recursive --new-file v2.1.98/linux/include/linux/sysctl.h linux/include/linux/sysctl.h --- v2.1.98/linux/include/linux/sysctl.h Tue Apr 14 14:29:26 1998 +++ linux/include/linux/sysctl.h Tue Apr 28 11:10:10 1998 @@ -72,7 +72,6 @@ KERN_STATINODE, KERN_DENTRY, /* dentry statistics */ KERN_MODPROBE, - KERN_KMOD_UNLOAD_DELAY }; @@ -166,7 +165,6 @@ NET_IPV4_TCP_KEEPALIVE_PROBES, NET_IPV4_TCP_RETRIES1, NET_IPV4_TCP_RETRIES2, - NET_IPV4_TCP_MAX_DELAY_ACKS, NET_IPV4_TCP_FIN_TIMEOUT, NET_IPV4_IP_MASQ_DEBUG, NET_TCP_SYNCOOKIES, diff -u --recursive --new-file v2.1.98/linux/include/linux/wanrouter.h linux/include/linux/wanrouter.h --- v2.1.98/linux/include/linux/wanrouter.h Fri Feb 6 15:35:44 1998 +++ linux/include/linux/wanrouter.h Thu Apr 30 12:54:52 1998 @@ -359,10 +359,10 @@ } wan_device_t; /* Public functions available for device drivers */ -extern int register_wandev (wan_device_t* wandev); -extern int unregister_wandev (char* name); -unsigned short wan_type_trans (struct sk_buff* skb, struct device* dev); -int wan_encapsulate (struct sk_buff* skb, struct device* dev); +extern int register_wan_device(wan_device_t* wandev); +extern int unregister_wan_device(char* name); +unsigned short wanrouter_type_trans(struct sk_buff* skb, struct device* dev); +int wanrouter_encapsulate(struct sk_buff* skb, struct device* dev); /* Proc interface functions. These must not be called by the drivers! */ extern int wanrouter_proc_init (void); diff -u --recursive --new-file v2.1.98/linux/include/net/dst.h linux/include/net/dst.h --- v2.1.98/linux/include/net/dst.h Tue Mar 17 22:18:15 1998 +++ linux/include/net/dst.h Thu Apr 30 12:54:02 1998 @@ -8,6 +8,7 @@ #ifndef _NET_DST_H #define _NET_DST_H +#include #include /* @@ -50,6 +51,10 @@ int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); + +#ifdef CONFIG_NET_CLS_ROUTE + __u32 tclassid; +#endif struct dst_ops *ops; diff -u --recursive --new-file v2.1.98/linux/include/net/ip.h linux/include/net/ip.h --- v2.1.98/linux/include/net/ip.h Wed Apr 8 19:36:29 1998 +++ linux/include/net/ip.h Thu Apr 30 12:54:03 1998 @@ -100,7 +100,6 @@ #define ip_acct_output dev_queue_xmit #endif extern void ip_fragment(struct sk_buff *skb, int (*out)(struct sk_buff*)); -extern struct sk_buff * ip_reply(struct sk_buff *skb, int payload); extern int ip_do_nat(struct sk_buff *skb); extern void ip_send_check(struct iphdr *ip); extern int ip_id_count; @@ -116,6 +115,18 @@ struct ipcm_cookie *ipc, struct rtable *rt, int flags); + + +struct ip_reply_arg { + struct iovec iov[2]; + int n_iov; /* redundant */ + u32 csum; + int csumoffset; /* u16 offset of csum in iov[0].iov_base */ + /* -1 if not needed */ +}; + +void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, + unsigned int len); extern int __ip_finish_output(struct sk_buff *skb); diff -u --recursive --new-file v2.1.98/linux/include/net/ip_fib.h linux/include/net/ip_fib.h --- v2.1.98/linux/include/net/ip_fib.h Tue Mar 10 10:03:35 1998 +++ linux/include/net/ip_fib.h Tue Apr 28 11:10:10 1998 @@ -50,6 +50,9 @@ int nh_weight; int nh_power; #endif +#ifdef CONFIG_NET_CLS_ROUTE + __u32 nh_tclassid; +#endif int nh_oif; u32 nh_gw; }; @@ -229,9 +232,11 @@ extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb); extern u32 fib_rules_map_destination(u32 daddr, struct fib_result *res); +#ifdef CONFIG_NET_CLS_ROUTE +extern u32 fib_rules_tclass(struct fib_result *res); +#endif extern u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags); extern void fib_rules_init(void); #endif - #endif _NET_FIB_H diff -u --recursive --new-file v2.1.98/linux/include/net/ip_masq.h linux/include/net/ip_masq.h --- v2.1.98/linux/include/net/ip_masq.h Mon Feb 23 18:12:12 1998 +++ linux/include/net/ip_masq.h Thu Apr 30 12:54:17 1998 @@ -24,11 +24,6 @@ * I used an extra 4K port-space */ -/* - * Linux ports don't normally get allocated above 32K. - * I used an extra 4K port-space - */ - #define PORT_MASQ_BEGIN 61000 #define PORT_MASQ_END (PORT_MASQ_BEGIN+4096) diff -u --recursive --new-file v2.1.98/linux/include/net/pkt_cls.h linux/include/net/pkt_cls.h --- v2.1.98/linux/include/net/pkt_cls.h Wed Dec 31 16:00:00 1969 +++ linux/include/net/pkt_cls.h Tue Apr 28 11:10:10 1998 @@ -0,0 +1,83 @@ +#ifndef __NET_PKT_CLS_H +#define __NET_PKT_CLS_H + + +#include + +struct rtattr; +struct tcmsg; + +/* Basic packet classifier frontend definitions. */ + +struct tcf_result +{ + unsigned long class; + u32 classid; +}; + +struct tcf_proto +{ + /* Fast access part */ + struct tcf_proto *next; + void *root; + int (*classify)(struct sk_buff*, struct tcf_proto*, struct tcf_result *); + u32 protocol; + + /* All the rest */ + u32 prio; + u32 classid; + struct Qdisc *q; + void *data; + struct tcf_proto_ops *ops; +}; + +struct tcf_walker +{ + int stop; + int skip; + int count; + int (*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *); +}; + +struct tcf_proto_ops +{ + struct tcf_proto_ops *next; + char kind[IFNAMSIZ]; + + int (*classify)(struct sk_buff*, struct tcf_proto*, struct tcf_result *); + int (*init)(struct tcf_proto*); + void (*destroy)(struct tcf_proto*); + + unsigned long (*get)(struct tcf_proto*, u32 handle); + void (*put)(struct tcf_proto*, unsigned long); + int (*change)(struct tcf_proto*, u32 handle, struct rtattr **, unsigned long *); + int (*delete)(struct tcf_proto*, unsigned long); + void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + + /* rtnetlink specific */ + int (*dump)(struct tcf_proto*, unsigned long, struct sk_buff *skb, struct tcmsg*); +}; + +/* Main classifier routine: scans classifier chain attached + to this qdisc, (optionally) tests for protocol and asks + specific classifiers. + */ + +extern __inline__ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) +{ + int err = 0; + u32 protocol = skb->protocol; + + for ( ; tp; tp = tp->next) { + if ((tp->protocol == protocol || + tp->protocol == __constant_htons(ETH_P_ALL)) && + (err = tp->classify(skb, tp, res)) >= 0) + return err; + } + return -1; +} + +extern int register_tcf_proto_ops(struct tcf_proto_ops *ops); +extern int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); + +#endif diff -u --recursive --new-file v2.1.98/linux/include/net/pkt_sched.h linux/include/net/pkt_sched.h --- v2.1.98/linux/include/net/pkt_sched.h Mon Jan 12 15:28:19 1998 +++ linux/include/net/pkt_sched.h Tue Apr 28 11:10:10 1998 @@ -1,21 +1,64 @@ #ifndef __NET_PKT_SCHED_H #define __NET_PKT_SCHED_H +#define PSCHED_GETTIMEOFDAY 1 +#define PSCHED_JIFFIES 2 +#define PSCHED_CPU 3 + +#define PSCHED_CLOCK_SOURCE PSCHED_GETTIMEOFDAY + #include +#include + +struct rtattr; +struct Qdisc; + +struct qdisc_walker +{ + int stop; + int skip; + int count; + int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); +}; + +struct Qdisc_class_ops +{ + /* Child qdisc manipulation */ + int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **); + + /* Class manipulation routines */ + unsigned long (*get)(struct Qdisc *, u32 classid); + void (*put)(struct Qdisc *, unsigned long); + int (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *); + int (*delete)(struct Qdisc *, unsigned long); + void (*walk)(struct Qdisc *, struct qdisc_walker * arg); + + /* Filter manipulation */ + struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long); + unsigned long (*bind_tcf)(struct Qdisc *, u32 classid); + void (*unbind_tcf)(struct Qdisc *, unsigned long); + + /* rtnetlink specific */ + int (*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*); +}; struct Qdisc_ops { struct Qdisc_ops *next; + struct Qdisc_class_ops *cl_ops; char id[IFNAMSIZ]; - int refcnt; int priv_size; - int (*enqueue)(struct sk_buff *skb, struct Qdisc *); + + int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff * (*dequeue)(struct Qdisc *); + int (*requeue)(struct sk_buff *, struct Qdisc *); + int (*drop)(struct Qdisc *); + + int (*init)(struct Qdisc *, struct rtattr *arg); void (*reset)(struct Qdisc *); void (*destroy)(struct Qdisc *); - int (*init)(struct Qdisc *, void *arg); - int (*control)(struct Qdisc *, void *); - int (*requeue)(struct sk_buff *skb, struct Qdisc *); + + int (*dump)(struct Qdisc *, struct sk_buff *); }; struct Qdisc_head @@ -30,23 +73,35 @@ struct Qdisc_head h; int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); struct sk_buff * (*dequeue)(struct Qdisc *dev); + unsigned flags; +#define TCQ_F_DEFAULT 1 +#define TCQ_F_BUILTIN 2 struct Qdisc_ops *ops; - int handle; + struct Qdisc *next; + u32 handle; + u32 classid; struct Qdisc *parent; struct sk_buff_head q; struct device *dev; - unsigned long dropped; - unsigned long tx_last; + + struct tc_stats stats; unsigned long tx_timeo; + unsigned long tx_last; + int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); char data[0]; }; +struct qdisc_rate_table +{ + struct tc_ratespec rate; + u32 data[256]; + struct qdisc_rate_table *next; + int refcnt; +}; -/* Yes, it is slow for [34]86, but we have no choice. - 10 msec resolution is appropriate only for bandwidth < 32Kbit/sec. - RULE: +/* Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth Normal IP packet size ~ 512byte, hence: @@ -57,22 +112,96 @@ 10msec resolution -> <50Kbit/sec. The result: [34]86 is not good choice for QoS router :-( + + The things are not so bad, because we may use artifical + clock evaluated by integration of network data flow + in the most critical places. + + Note: we do not use fastgettimeofday. + The reason is that, when it is not the same thing as + gettimeofday, it returns invalid timestamp, which is + not updated, when net_bh is active. + + So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums + with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86 + and pentiums without rtdsc. + You can use PSCHED_GETTIMEOFDAY on another architectures, + which have fast and precise clock source, but it is too expensive. */ -typedef struct timeval psched_time_t; +#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY -/* On 64bit architecures it would be clever to define: -typedef u64 psched_time_t; - and make all this boring arithmetics directly - */ +typedef struct timeval psched_time_t; +typedef long psched_tdiff_t; -#ifndef SCHEDULE_ONLY_LOW_BANDWIDTH #define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) +#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) + +#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */ + +typedef u64 psched_time_t; +typedef long psched_tdiff_t; + +extern psched_time_t psched_time_base; + +#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES + +#define PSCHED_WATCHER + +extern unsigned long psched_time_mark; + +#if HZ == 100 +#define PSCHED_JSCALE 7 +#elif HZ == 1024 +#define PSCHED_JSCALE 10 #else -#define PSCHED_GET_TIME(stamp) ((stamp) = xtime) +#define PSCHED_JSCALE 0 #endif +#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<>PSCHED_JSCALE) + +#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU + +extern psched_tdiff_t psched_clock_per_hz; +extern int psched_clock_scale; + +#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz) + +#if CPU == 586 || CPU == 686 + +#define PSCHED_GET_TIME(stamp) \ +({ u32 hi, lo; \ + __asm__ __volatile__ (".byte 0x0f,0x31" :"=a" (lo), "=d" (hi)); \ + (stamp) = ((((u64)hi)<<32) + lo)>>psched_clock_scale; \ +}) + +#elif defined (__alpha__) + +#define PSCHED_WATCHER + +extern u32 psched_time_mark; + +#define PSCHED_GET_TIME(stamp) \ +({ u32 __res; \ + __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \ + if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \ + psched_time_mark = __res; \ + (stamp) = (psched_time_base + __res)>>psched_clock_scale; \ +}) + +#else + +#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch. + +#endif /* ARCH */ + +#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */ + +#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */ + +#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY #define PSCHED_TDIFF(tv1, tv2) \ ({ \ int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ @@ -106,8 +235,6 @@ __delta; \ }) -#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) - #define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ (tv1).tv_sec <= (tv2).tv_sec) || \ (tv1).tv_sec < (tv2).tv_sec) @@ -127,24 +254,86 @@ (tv).tv_usec -= 1000000; } \ }) -/* Set/check that undertime is in the "past perfect"; +/* Set/check that time is in the "past perfect"; it depends on concrete representation of system time */ #define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) #define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) +#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) + +#else + +#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) +#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ +({ \ + long __delta = (tv1) - (tv2); \ + if ( __delta > (bound)) { __delta = (bound); guard; } \ + __delta; \ +}) + + +#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) +#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) +#define PSCHED_TADD(tv, delta) ((tv) += (delta)) +#define PSCHED_SET_PASTPERFECT(t) ((t) = 0) +#define PSCHED_IS_PASTPERFECT(t) ((t) == 0) +#define PSCHED_AUDIT_TDIFF(t) + +#endif + +struct tcf_police +{ + struct tcf_police *next; + int refcnt; + u32 index; + + int action; + u32 burst; + u32 mtu; + + u32 toks; + u32 ptoks; + psched_time_t t_c; + struct qdisc_rate_table *R_tab; + struct qdisc_rate_table *P_tab; +}; + +extern void tcf_police_destroy(struct tcf_police *p); +extern struct tcf_police * tcf_police_locate(struct rtattr *rta); +extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p); +extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); + +extern __inline__ void tcf_police_release(struct tcf_police *p) +{ + if (p && --p->refcnt == 0) + tcf_police_destroy(p); +} extern struct Qdisc noop_qdisc; +extern struct Qdisc_ops noop_qdisc_ops; +extern struct Qdisc_ops pfifo_qdisc_ops; +extern struct Qdisc_ops bfifo_qdisc_ops; int register_qdisc(struct Qdisc_ops *qops); int unregister_qdisc(struct Qdisc_ops *qops); +struct Qdisc *qdisc_lookup(struct device *dev, u32 handle); +struct Qdisc *qdisc_lookup_class(struct device *dev, u32 handle); void dev_init_scheduler(struct device *dev); void dev_shutdown(struct device *dev); void dev_activate(struct device *dev); void dev_deactivate(struct device *dev); void qdisc_reset(struct Qdisc *qdisc); void qdisc_destroy(struct Qdisc *qdisc); +struct Qdisc * qdisc_create_dflt(struct device *dev, struct Qdisc_ops *ops); +struct Qdisc * dev_set_scheduler(struct device *dev, struct Qdisc *qdisc); +int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt); +void qdisc_kill_estimator(struct tc_stats *stats); +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); +void qdisc_put_rtab(struct qdisc_rate_table *tab); +int teql_init(void); +int tc_filter_init(void); int pktsched_init(void); void qdisc_run_queues(void); @@ -159,6 +348,12 @@ qdisc_head.forw = &q->h; } } +} + +extern __inline__ unsigned psched_mtu(struct device *dev) +{ + unsigned mtu = dev->mtu; + return dev->hard_header ? mtu + dev->hard_header_len : mtu; } #endif diff -u --recursive --new-file v2.1.98/linux/include/net/snmp.h linux/include/net/snmp.h --- v2.1.98/linux/include/net/snmp.h Mon Apr 6 17:41:01 1998 +++ linux/include/net/snmp.h Tue Apr 28 11:10:10 1998 @@ -130,6 +130,7 @@ unsigned long SyncookiesSent; unsigned long SyncookiesRecv; unsigned long SyncookiesFailed; + unsigned long EmbryonicRsts; }; #endif diff -u --recursive --new-file v2.1.98/linux/include/net/sock.h linux/include/net/sock.h --- v2.1.98/linux/include/net/sock.h Tue Apr 14 14:29:26 1998 +++ linux/include/net/sock.h Thu Apr 30 12:52:29 1998 @@ -323,7 +323,7 @@ /* Define this to get the sk->debug debugging facility. */ #define SOCK_DEBUGGING #ifdef SOCK_DEBUGGING -#define SOCK_DEBUG(sk, msg...) if((sk) && ((sk)->debug)) printk(KERN_DEBUG ## msg) +#define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG ## msg); } while (0) #else #define SOCK_DEBUG(sk, msg...) do { } while (0) #endif diff -u --recursive --new-file v2.1.98/linux/include/net/tcp.h linux/include/net/tcp.h --- v2.1.98/linux/include/net/tcp.h Tue Apr 14 14:29:26 1998 +++ linux/include/net/tcp.h Thu Apr 30 12:54:04 1998 @@ -172,7 +172,8 @@ __u32 rcv_nxt; struct tcp_func *af_specific; struct tcp_bind_bucket *tb; - struct timer_list timer; + struct tcp_tw_bucket *next_death; + int death_slot; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr v6_daddr; struct in6_addr v6_rcv_saddr; @@ -248,9 +249,11 @@ #define MAX_RESET_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15) #define MAX_TCPHEADER_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + 20 + MAX_HEADER + 15) -#define MAX_WINDOW 32767 /* Never offer a window over 32767 without using - window scaling (not yet supported). Some poor - stacks do signed 16bit maths! */ +/* + * Never offer a window over 32767 without using window scaling. Some + * poor stacks do signed 16bit maths! + */ +#define MAX_WINDOW 32767 #define MIN_WINDOW 2048 #define MAX_ACK_BACKLOG 2 #define MAX_DELAY_ACK 2 @@ -293,13 +296,17 @@ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_PERIOD ((75*HZ)>>2) /* period of keepalive check */ -#define TCP_SYNACK_PERIOD (HZ/2) +#define TCP_SYNACK_PERIOD (HZ/2) /* How often to run the synack slow timer */ #define TCP_QUICK_TRIES 8 /* How often we try to retransmit, until - * we tell the LL layer that it is something + * we tell the link layer that it is something * wrong (e.g. that it can expire redirects) */ #define TCP_BUCKETGC_PERIOD (HZ) +/* TIME_WAIT reaping mechanism. */ +#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ +#define TCP_TWKILL_PERIOD ((HZ*60)/TCP_TWKILL_SLOTS) + /* * TCP option */ @@ -564,6 +571,8 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); +/* tcp_output.c */ + extern void tcp_read_wakeup(struct sock *); extern void tcp_write_xmit(struct sock *); extern void tcp_time_wait(struct sock *); @@ -572,8 +581,6 @@ extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); -/* tcp_output.c */ - extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern void tcp_write_wakeup(struct sock *); @@ -615,11 +622,38 @@ #define TCP_SLT_SYNACK 0 #define TCP_SLT_KEEPALIVE 1 -#define TCP_SLT_BUCKETGC 2 -#define TCP_SLT_MAX 3 +#define TCP_SLT_TWKILL 2 +#define TCP_SLT_BUCKETGC 3 +#define TCP_SLT_MAX 4 extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX]; +/* Compute the current effective MSS, taking SACKs and IP options, + * and even PMTU discovery events into account. + */ +static __inline__ unsigned int tcp_current_mss(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct dst_entry *dst = sk->dst_cache; + unsigned int mss_now = sk->mss; + + if(dst && (sk->mtu < dst->pmtu)) { + unsigned int mss_distance = (sk->mtu - sk->mss); + + /* PMTU discovery event has occurred. */ + sk->mtu = dst->pmtu; + sk->mss = sk->mtu - mss_distance; + } + + if(tp->sack_ok && tp->num_sacks) + mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + + (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)); + if(sk->opt) + mss_now -= sk->opt->optlen; + + return mss_now; +} + /* Compute the actual receive window we are currently advertising. */ static __inline__ u32 tcp_receive_window(struct tcp_opt *tp) { @@ -919,7 +953,7 @@ * our initial window offering to 32k. There should also * be a sysctl option to stop being nice. */ - (*rcv_wnd) = min(space,32767); + (*rcv_wnd) = min(space, MAX_WINDOW); (*rcv_wscale) = 0; if (wscale_ok) { /* See RFC1323 for an explanation of the limit to 14 */ diff -u --recursive --new-file v2.1.98/linux/include/scsi/scsi_ioctl.h linux/include/scsi/scsi_ioctl.h --- v2.1.98/linux/include/scsi/scsi_ioctl.h Sun Oct 13 23:07:16 1996 +++ linux/include/scsi/scsi_ioctl.h Sat Apr 25 22:15:22 1998 @@ -15,9 +15,6 @@ #ifdef __KERNEL__ -extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); -extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); - /* * Structures used for scsi_ioctl et al. */ @@ -32,6 +29,11 @@ __u32 dev_id; __u32 host_unique_id; } Scsi_Idlun; + +extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); +extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); +extern int scsi_ioctl_send_command(Scsi_Device *dev, + Scsi_Ioctl_Command *arg); #endif diff -u --recursive --new-file v2.1.98/linux/init/main.c linux/init/main.c --- v2.1.98/linux/init/main.c Tue Apr 14 14:29:26 1998 +++ linux/init/main.c Sat Apr 25 22:13:39 1998 @@ -1162,13 +1162,6 @@ smp_begin(); #endif -#ifdef CONFIG_KMOD - { - extern int kmod_init(void); - kmod_init(); - } -#endif - #ifdef CONFIG_UMSDOS_FS { /* diff -u --recursive --new-file v2.1.98/linux/kernel/kmod.c linux/kernel/kmod.c --- v2.1.98/linux/kernel/kmod.c Sat Apr 25 18:13:12 1998 +++ linux/kernel/kmod.c Wed Apr 29 17:53:00 1998 @@ -1,6 +1,12 @@ /* kmod, the new module loader (replaces kerneld) Kirk Petersen + + Reorganized not to be a daemon by Adam Richter, with guidance + from Greg Zornetzer. + + Modified to avoid chroot and file sharing problems. + Mikael Pettersson */ #define __KERNEL_SYSCALLS__ @@ -8,147 +14,88 @@ #include #include #include +#include +#include /* - kmod_unload_delay and modprobe_path are set via /proc/sys. + modprobe_path is set via /proc/sys. */ -int kmod_unload_delay = 60; char modprobe_path[256] = "/sbin/modprobe"; -static char module_name[64] = ""; -static char * argv[] = { modprobe_path, "-s", "-k", module_name, NULL }; static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/usr/bin:/bin", NULL }; /* - kmod_queue synchronizes the kmod thread and the rest of the system - kmod_unload_timer is what we use to unload modules - after kmod_unload_delay seconds -*/ -static struct wait_queue * kmod_queue = NULL; -static struct timer_list kmod_unload_timer; + exec_modprobe is spawned from a kernel-mode user process, + then changes its state to behave _as_if_ it was spawned + from the kernel's init process + (ppid and {e,}gid are not adjusted, but that shouldn't + be a problem since we trust modprobe) +*/ +#define task_init task[smp_num_cpus] + +static inline void +use_init_file_context(void) { + lock_kernel(); + + /* don't use the user's root, use init's root instead */ + exit_fs(current); /* current->fs->count--; */ + current->fs = task_init->fs; + current->fs->count++; -/* - It is not easy to implement a full fork in kernel-space on some - systems (Alpha), and it is not necessary for us here. This is - a new thread that does the exec. -*/ -static int kmod_exec_modprobe(void * data) -{ - sigemptyset(¤t->blocked); - execve(modprobe_path, argv, envp); - printk(KERN_ERR "kmod: failed to load module %s\n", module_name); - return 0; + unlock_kernel(); } -/* - kmod_thread is the thread that does most of the work. kmod_unload and - request_module tell it to wake up and do work. -*/ -static int kmod_thread(void * data) +static int exec_modprobe(void * module_name) { - int pid; - - /* - Initialize basic thread information - */ - current->session = 1; - current->pgrp = 1; - sprintf(current->comm, "kmod"); - sigfillset(¤t->blocked); - - /* - This is the main kmod_thread loop. It first sleeps, then - handles requests from request_module or kmod_unload. - */ - - while (1) { - interruptible_sleep_on(&kmod_queue); + char *argv[] = { modprobe_path, "-s", "-k", (char*)module_name, NULL}; + int i; - /* - If request_module woke us up, we should try to - load module_name. If not, kmod_unload woke us up, - do call delete_module. - (if somehow both want us to do something, ignore the - delete_module request) - */ - if (module_name[0] == '\0') { - delete_module(NULL); - } else { - pid = kernel_thread(kmod_exec_modprobe, NULL, SIGCHLD); - if (pid > 0) { - waitpid(pid, NULL, 0); - module_name[0] = '\0'; - wake_up(&kmod_queue); - } else { - printk(KERN_ERR "kmod: fork failed, errno %d\n", -pid); - } - } - } + use_init_file_context(); - return 0; /* Never reached. */ -} - -/* - kmod_unload is the function that the kernel calls when - the kmod_unload_timer expires -*/ -void kmod_unload(unsigned long x) -{ - /* - wake up the kmod thread, which does the work - (we can't call delete_module, as it locks the kernel and - we are in the bottom half of the kernel (right?)) - once it is awake, reset the timer + /* Prevent parent user process from sending signals to child. + Otherwise, if the modprobe program does not exist, it might + be possible to get a user defined signal handler to execute + as the super user right after the execve fails if you time + the signal just right. */ - wake_up(&kmod_queue); - kmod_unload_timer.expires = jiffies + (kmod_unload_delay * HZ); - add_timer(&kmod_unload_timer); -} - -int kmod_init(void) -{ - printk("Starting kmod\n"); + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + flush_signal_handlers(current); + spin_unlock_irq(¤t->sigmask_lock); - /* - * CLONE_FS means that our "cwd" will follow that of init. - * CLONE_FILES just saves some space (we don't need any - * new file descriptors). Ditto for CLONE_SIGHAND. - */ - kernel_thread(kmod_thread, NULL, CLONE_FILES | CLONE_FS | CLONE_SIGHAND); - - kmod_unload_timer.next = NULL; - kmod_unload_timer.prev = NULL; - kmod_unload_timer.expires = jiffies + (5 * 60 * HZ); - kmod_unload_timer.data = 0L; - kmod_unload_timer.function = kmod_unload; - add_timer(&kmod_unload_timer); + for (i = 0; i < current->files->max_fds; i++ ) { + if (current->files->fd[i]) close(i); + } + set_fs(KERNEL_DS); /* Allow execve args to be in kernel space. */ + current->uid = current->euid = current->fsuid = 0; + if (execve(modprobe_path, argv, envp) < 0) { + printk(KERN_ERR + "kmod: failed to exec %s -s -k %s, errno = %d\n", + modprobe_path, (char*) module_name, errno); + return -errno; + } return 0; } /* - request_module, the function that everyone calls when they need a - module to be loaded + request_module: the function that everyone calls when they need + a module. */ -int request_module(const char * name) +int request_module(const char * module_name) { - /* first, copy the name of the module into module_name */ - /* then wake_up() the kmod daemon */ - /* wait for the kmod daemon to finish (it will wake us up) */ - - /* - kmod_thread is sleeping, so start by copying the name of - the module into module_name. Once that is done, wake up - kmod_thread. - */ - strncpy(module_name, name, sizeof(module_name)); - module_name[sizeof(module_name)-1] = '\0'; - wake_up(&kmod_queue); - - /* - Now that we have told kmod_thread what to do, we want to - go to sleep and let it do its work. It will wake us up, - at which point we will be done (the module will be loaded). - */ - interruptible_sleep_on(&kmod_queue); + int pid; + int waitpid_result; + + pid = kernel_thread(exec_modprobe, (void*) module_name, + CLONE_FS | SIGCHLD); + if (pid < 0) { + printk(KERN_ERR "kmod: fork failed, errno %d\n", -pid); + return pid; + } + waitpid_result = waitpid(pid, NULL, 0); + if (waitpid_result != pid) { + printk (KERN_ERR "kmod: waitpid(%d,NULL,0) failed, returning %d.\n", + pid, waitpid_result); + } return 0; } diff -u --recursive --new-file v2.1.98/linux/kernel/sysctl.c linux/kernel/sysctl.c --- v2.1.98/linux/kernel/sysctl.c Wed Apr 1 20:11:55 1998 +++ linux/kernel/sysctl.c Sat Apr 25 22:13:40 1998 @@ -43,7 +43,6 @@ extern int sysctl_overcommit_memory; #ifdef CONFIG_KMOD extern char modprobe_path[]; -extern int kmod_unload_delay; #endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; @@ -180,8 +179,6 @@ #ifdef CONFIG_KMOD {KERN_MODPROBE, "modprobe", &modprobe_path, 256, 0644, NULL, &proc_dostring, &sysctl_string }, - {KERN_KMOD_UNLOAD_DELAY, "kmod_unload_delay", &kmod_unload_delay, - sizeof(int), 0644, NULL, &proc_dointvec}, #endif #ifdef CONFIG_CHR_DEV_SG {KERN_NRFILE, "sg-big-buff", &sg_big_buff, sizeof (int), diff -u --recursive --new-file v2.1.98/linux/mm/page_alloc.c linux/mm/page_alloc.c --- v2.1.98/linux/mm/page_alloc.c Sat Apr 25 18:13:12 1998 +++ linux/mm/page_alloc.c Tue Apr 28 14:18:12 1998 @@ -108,17 +108,6 @@ * but this had better return false if any reasonable "get_free_page()" * allocation could currently fail.. * - * Currently we approve of the following situations: - * - the highest memory order has two entries - * - the highest memory order has one free entry and: - * - the next-highest memory order has two free entries - * - the highest memory order has one free entry and: - * - the next-highest memory order has one free entry - * - the next-next-highest memory order has two free entries - * - * [previously, there had to be two entries of the highest memory - * order, but this lead to problems on large-memory machines.] - * * This will return zero if no list was found, non-zero * if there was memory (the bigger, the better). */ @@ -129,13 +118,14 @@ struct free_area_struct * list; /* - * If we have more than about 6% of all memory free, + * If we have more than about 3% to 5% of all memory free, * consider it to be good enough for anything. * It may not be, due to fragmentation, but we * don't want to keep on forever trying to find * free unfragmented memory. + * Added low/high water marks to avoid thrashing -- Rik. */ - if (nr_free_pages > num_physpages >> 4) + if (nr_free_pages > (num_physpages >> 5) + (nr ? 0 : num_physpages >> 6)) return nr+1; list = free_area + NR_MEM_LISTS; @@ -286,16 +276,17 @@ } } -repeat: - spin_lock_irqsave(&page_alloc_lock, flags); - RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA)); - spin_unlock_irqrestore(&page_alloc_lock, flags); - if (gfp_mask & __GFP_WAIT) { - int freed = try_to_free_pages(gfp_mask,SWAP_CLUSTER_MAX); + for (;;) { + spin_lock_irqsave(&page_alloc_lock, flags); + RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA)); + spin_unlock_irqrestore(&page_alloc_lock, flags); + if (!(gfp_mask & __GFP_WAIT)) + break; + shrink_dcache(); + if (!try_to_free_pages(gfp_mask, SWAP_CLUSTER_MAX)) + break; gfp_mask &= ~__GFP_WAIT; /* go through this only once */ maxorder = NR_MEM_LISTS; /* Allow anything this time */ - if (freed) - goto repeat; } nopage: return 0; diff -u --recursive --new-file v2.1.98/linux/mm/vmscan.c linux/mm/vmscan.c --- v2.1.98/linux/mm/vmscan.c Thu Mar 26 15:57:06 1998 +++ linux/mm/vmscan.c Tue Apr 28 14:19:02 1998 @@ -441,9 +441,6 @@ int i=6; int stop; - /* Let the dcache know we're looking for memory ... */ - shrink_dcache_memory(); - /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); @@ -458,17 +455,17 @@ switch (state) { do { case 0: + state = 1; if (shrink_mmap(i, gfp_mask)) return 1; - state = 1; case 1: + state = 2; if ((gfp_mask & __GFP_IO) && shm_swap(i, gfp_mask)) return 1; - state = 2; default: + state = 0; if (swap_out(i, gfp_mask)) return 1; - state = 0; i--; } while ((i - stop) >= 0); } @@ -547,29 +544,26 @@ run_task_queue(&tq_disk); schedule(); swapstats.wakeups++; + + /* This will gently shrink the dcache.. */ + shrink_dcache_memory(); /* * Do the background pageout: be * more aggressive if we're really * low on free memory. * - * Normally this is called 4 times - * a second if we need more memory, - * so this has a normal rate of - * X*4 pages of memory free'd per - * second. That rate goes up when - * - * - we're really low on memory (we get woken - * up a lot more) - * - other processes fail to allocate memory, - * at which time they try to do their own - * freeing. - * - * A "tries" value of 50 means up to 200 pages - * per second (1.6MB/s). This should be a /proc - * thing. + * The number of tries is 512 divided by an + * 'urgency factor'. In practice this will mean + * a value of 512 / 8 = 64 pages at a time, + * giving 64 * 4 (times/sec) * 4k (pagesize) = + * 1 MB/s in lowest-priority background + * paging. This number rises to 8 MB/s when the + * priority is highest (but then we'll be woken + * up more often and the rate will be even higher). + * -- Should make this sysctl tunable... */ - tries = (50 << 2) >> free_memory_available(3); + tries = (512) >> free_memory_available(3); while (tries--) { int gfp_mask; @@ -622,7 +616,7 @@ if ((long) (now - want) >= 0) { if (want_wakeup || (num_physpages * buffer_mem.max_percent) < (buffermem >> PAGE_SHIFT) * 100 - || (num_physpages * page_cache.max_percent < page_cache_size)) { + || (num_physpages * page_cache.max_percent < page_cache_size * 100)) { /* Set the next wake-up time */ next_swap_jiffies = now + swapout_interval; wake_up(&kswapd_wait); diff -u --recursive --new-file v2.1.98/linux/net/Config.in linux/net/Config.in --- v2.1.98/linux/net/Config.in Mon Feb 23 18:12:12 1998 +++ linux/net/Config.in Tue Apr 28 11:10:10 1998 @@ -42,6 +42,11 @@ # if [ "$CONFIG_LLC" = "y" ]; then # bool 'Netbeui (EXPERIMENTAL)' CONFIG_NETBEUI # fi + tristate 'Acorn Econet/AUN protocols (EXPERIMENTAL)' CONFIG_ECONET + if [ "$CONFIG_ECONET" != "n" ]; then + bool ' AUN over UDP' CONFIG_ECONET_AUNUDP + bool ' Native Econet' CONFIG_ECONET_NATIVE + fi tristate 'WAN router' CONFIG_WAN_ROUTER bool 'Fast switching (read help!)' CONFIG_NET_FASTROUTE bool 'Forwarding between high speed interfaces' CONFIG_NET_HW_FLOWCONTROL diff -u --recursive --new-file v2.1.98/linux/net/bridge/Makefile linux/net/bridge/Makefile --- v2.1.98/linux/net/bridge/Makefile Mon Apr 7 11:35:32 1997 +++ linux/net/bridge/Makefile Tue Apr 28 11:10:10 1998 @@ -1,5 +1,5 @@ # -# Makefile for the Linux TCP/IP (INET) layer. +# Makefile for the Linux Bridge layer. # # Note! Dependencies are done automagically by 'make dep', which also # removes any old dependencies. DON'T put your own dependencies here diff -u --recursive --new-file v2.1.98/linux/net/core/Makefile linux/net/core/Makefile --- v2.1.98/linux/net/core/Makefile Tue Jan 13 20:15:33 1998 +++ linux/net/core/Makefile Tue Apr 28 11:10:10 1998 @@ -9,8 +9,7 @@ O_TARGET := core.o -O_OBJS := sock.o skbuff.o iovec.o datagram.o dst.o scm.o \ - neighbour.o rtnetlink.o utils.o +O_OBJS := sock.o skbuff.o iovec.o datagram.o scm.o ifeq ($(CONFIG_SYSCTL),y) O_OBJS += sysctl_net_core.o @@ -22,7 +21,7 @@ ifdef CONFIG_NET -O_OBJS += dev.o dev_mcast.o +O_OBJS += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o ifdef CONFIG_FIREWALL OX_OBJS += firewall.o diff -u --recursive --new-file v2.1.98/linux/net/core/dev.c linux/net/core/dev.c --- v2.1.98/linux/net/core/dev.c Mon Apr 6 17:41:01 1998 +++ linux/net/core/dev.c Tue Apr 28 15:00:46 1998 @@ -1789,7 +1789,9 @@ { struct device *dev, **dp; +#ifdef CONFIG_NET_SCHED pktsched_init(); +#endif /* * Initialise the packet receive queue. diff -u --recursive --new-file v2.1.98/linux/net/core/rtnetlink.c linux/net/core/rtnetlink.c --- v2.1.98/linux/net/core/rtnetlink.c Tue Mar 10 10:03:36 1998 +++ linux/net/core/rtnetlink.c Tue Apr 28 11:10:10 1998 @@ -63,6 +63,19 @@ rtnl_shunlock(); } +int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) +{ + memset(tb, 0, sizeof(struct rtattr*)*maxattr); + + while (RTA_OK(rta, len)) { + unsigned flavor = rta->rta_type; + if (flavor && flavor <= maxattr) + tb[flavor-1] = rta; + rta = RTA_NEXT(rta, len); + } + return 0; +} + #ifdef CONFIG_RTNETLINK struct sock *rtnl; @@ -109,6 +122,19 @@ memcpy(RTA_DATA(rta), data, attrlen); } +int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +{ + int err = 0; + + NETLINK_CB(skb).dst_groups = group; + if (echo) + atomic_inc(&skb->users); + netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); + if (echo) + err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); + return err; +} + #ifdef CONFIG_RTNL_OLD_IFINFO static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev, int type, pid_t pid, u32 seq) @@ -132,7 +158,7 @@ strncpy(r->ifi_name, dev->name, IFNAMSIZ-1); r->ifi_qdiscname[0] = 0; r->ifi_qdisc = dev->qdisc_sleeping->handle; - if (dev->qdisc_sleeping->ops) + if (dev->qdisc_sleeping) strcpy(r->ifi_qdiscname, dev->qdisc_sleeping->ops->id); if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); @@ -175,7 +201,7 @@ } if (dev->ifindex != dev->iflink) RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink); - if (dev->qdisc_sleeping->ops) + if (dev->qdisc_sleeping) RTA_PUT(skb, IFLA_QDISC, strlen(dev->qdisc_sleeping->ops->id) + 1, dev->qdisc_sleeping->ops->id); diff -u --recursive --new-file v2.1.98/linux/net/core/sock.c linux/net/core/sock.c --- v2.1.98/linux/net/core/sock.c Tue Apr 14 14:29:26 1998 +++ linux/net/core/sock.c Tue Apr 28 11:10:10 1998 @@ -290,6 +290,7 @@ break; +#ifdef CONFIG_NETDEVICES case SO_BINDTODEVICE: /* Bind this socket to a particular device like "eth0", * as specified in an ifreq structure. If the device @@ -316,6 +317,7 @@ } } return 0; +#endif #ifdef CONFIG_FILTER diff -u --recursive --new-file v2.1.98/linux/net/econet/Makefile linux/net/econet/Makefile --- v2.1.98/linux/net/econet/Makefile Wed Dec 31 16:00:00 1969 +++ linux/net/econet/Makefile Tue Apr 28 11:10:11 1998 @@ -0,0 +1,23 @@ +# +# Makefile for Econet support code. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +MOD_LIST_NAME := NET_MISC_MODULES + +O_OBJS := +M_OBJS := + +ifeq ($(CONFIG_ECONET),y) + O_OBJS += econet.o +else + ifeq ($(CONFIG_ECONET), m) + M_OBJS += econet.o + endif +endif + +include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.98/linux/net/econet/econet.c linux/net/econet/econet.c --- v2.1.98/linux/net/econet/econet.c Wed Dec 31 16:00:00 1969 +++ linux/net/econet/econet.c Tue Apr 28 11:10:11 1998 @@ -0,0 +1,1108 @@ +/* + * An implementation of the Acorn Econet and AUN protocols. + * Philip Blundell + * + * Fixes: + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct proto_ops econet_ops; +static struct sock *econet_sklist; + +#ifdef CONFIG_ECONET_AUNUDP +static struct socket *udpsock; +#define AUN_PORT 0x8000 + +struct aunhdr +{ + unsigned char code; /* AUN magic protocol byte */ + unsigned char port; + unsigned char cb; + unsigned char pad; + unsigned long handle; +}; + +static unsigned long aun_seq = 0; + +/* Queue of packets waiting to be transmitted. */ +static struct sk_buff_head aun_queue; +static struct timer_list ab_cleanup_timer; + +#endif /* CONFIG_ECONET_AUNUDP */ + +/* Per-packet information */ +struct ec_cb +{ + struct sockaddr_ec sec; + unsigned long cookie; /* Supplied by user. */ +#ifdef CONFIG_ECONET_AUNUDP + int done; + unsigned long seq; /* Sequencing */ + unsigned long timeout; /* Timeout */ + unsigned long start; /* jiffies */ +#endif +#ifdef CONFIG_ECONET_NATIVE + void (*sent)(struct sk_buff *, int result); +#endif +}; + +struct ec_device +{ + struct device *dev; /* Real device structure */ + unsigned char station, net; /* Econet protocol address */ + struct ec_device *prev, *next; /* Linked list */ +}; + +static struct ec_device *edevlist = NULL; + +static spinlock_t edevlist_lock; + +/* + * Faster version of edev_get - call with IRQs off + */ + +static __inline__ struct ec_device *__edev_get(struct device *dev) +{ + struct ec_device *edev; + for (edev = edevlist; edev; edev = edev->next) + { + if (edev->dev == dev) + break; + } + return edev; +} + +/* + * Find an Econet device given its `dev' pointer. This is IRQ safe. + */ + +static struct ec_device *edev_get(struct device *dev) +{ + struct ec_device *edev; + unsigned long flags; + spin_lock_irqsave(&edevlist_lock, flags); + edev = __edev_get(dev); + spin_unlock_irqrestore(&edevlist_lock, flags); + return edev; +} + +/* + * Pull a packet from our receive queue and hand it to the user. + * If necessary we block. + */ + +static int econet_recvmsg(struct socket *sock, struct msghdr *msg, int len, + int flags, struct scm_cookie *scm) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + int copied, err; + + msg->msg_namelen = sizeof(struct sockaddr_ec); + + /* + * Call the generic datagram receiver. This handles all sorts + * of horrible races and re-entrancy so we can forget about it + * in the protocol layers. + * + * Now it will return ENETDOWN, if device have just gone down, + * but then it will block. + */ + + skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err); + + /* + * An error occurred so return it. Because skb_recv_datagram() + * handles the blocking we don't see and worry about blocking + * retries. + */ + + if(skb==NULL) + goto out; + + /* + * You lose any data beyond the buffer you gave. If it worries a + * user program they can ask the device for its MTU anyway. + */ + + copied = skb->len; + if (copied > len) + { + copied=len; + msg->msg_flags|=MSG_TRUNC; + } + + /* We can't use skb_copy_datagram here */ + err = memcpy_toiovec(msg->msg_iov, skb->data, copied); + if (err) + goto out_free; + sk->stamp=skb->stamp; + + if (msg->msg_name) + memcpy(msg->msg_name, skb->cb, msg->msg_namelen); + + /* + * Free or return the buffer as appropriate. Again this + * hides all the races and re-entrancy issues from us. + */ + err = copied; + +out_free: + skb_free_datagram(sk, skb); +out: + return err; +} + +/* + * Bind an Econet socket. + */ + +static int econet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr; + struct sock *sk=sock->sk; + + /* + * Check legality + */ + + if (addr_len < sizeof(struct sockaddr_ec)) + return -EINVAL; + if (sec->sec_family != AF_ECONET) + return -EINVAL; + + sk->protinfo.af_econet->cb = sec->cb; + sk->protinfo.af_econet->port = sec->port; + sk->protinfo.af_econet->station = sec->addr.station; + sk->protinfo.af_econet->net = sec->addr.net; + + return 0; +} + +/* + * Queue a transmit result for the user to be told about. + */ + +static void tx_result(struct sock *sk, unsigned long cookie, int result) +{ + struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); + struct ec_cb *eb; + struct sockaddr_ec *sec; + + if (skb == NULL) + { + printk(KERN_DEBUG "ec: memory squeeze, transmit result dropped.\n"); + return; + } + + eb = (struct ec_cb *)&skb->cb; + sec = (struct sockaddr_ec *)&eb->sec; + memset(sec, 0, sizeof(struct sockaddr_ec)); + sec->cookie = cookie; + sec->type = ECTYPE_TRANSMIT_STATUS | result; + sec->sec_family = AF_ECONET; + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); +} + +#ifdef CONFIG_ECONET_NATIVE +/* + * Called by the Econet hardware driver when a packet transmit + * has completed. Tell the user. + */ + +static void ec_tx_done(struct sk_buff *skb, int result) +{ + struct ec_cb *eb = (struct ec_cb *)&skb->cb; + tx_result(skb->sk, eb->cookie, result); +} +#endif + +/* + * Send a packet. We have to work out which device it's going out on + * and hence whether to use real Econet or the UDP emulation. + */ + +static int econet_sendmsg(struct socket *sock, struct msghdr *msg, int len, + struct scm_cookie *scm) +{ + struct sock *sk = sock->sk; + struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name; + struct device *dev; + struct ec_addr addr; + struct ec_device *edev; + int err; + unsigned char port, cb; + struct sk_buff *skb; + struct ec_cb *eb; +#ifdef CONFIG_ECONET_NATIVE + unsigned short proto = 0; +#endif +#ifdef CONFIG_ECONET_AUNUDP + struct msghdr udpmsg; + struct iovec iov[msg->msg_iovlen+1]; + struct aunhdr ah; + struct sockaddr_in udpdest; + __kernel_size_t size; + int i; + mm_segment_t oldfs; +#endif + + /* + * Check the flags. + */ + + if (msg->msg_flags&~MSG_DONTWAIT) + return(-EINVAL); + + /* + * Get and verify the address. + */ + + if (saddr == NULL) { + addr.station = sk->protinfo.af_econet->station; + addr.net = sk->protinfo.af_econet->net; + port = sk->protinfo.af_econet->port; + cb = sk->protinfo.af_econet->cb; + } else { + if (msg->msg_namelen < sizeof(struct sockaddr_ec)) + return -EINVAL; + addr.station = saddr->addr.station; + addr.net = saddr->addr.net; + port = saddr->port; + cb = saddr->cb; + } + + /* Look for a device with the right network number. */ + for (edev = edevlist; edev && (edev->net != addr.net); + edev = edev->next); + + /* Bridge? What's that? */ + if (edev == NULL) + return -ENETUNREACH; + + dev = edev->dev; + + if (dev->type == ARPHRD_ECONET) + { + /* Real hardware Econet. We're not worthy etc. */ +#ifdef CONFIG_ECONET_NATIVE + unsigned char *p; + + dev_lock_list(); + + skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 0, + msg->msg_flags & MSG_DONTWAIT, &err); + if (skb==NULL) + goto out_unlock; + + skb_reserve(skb, (dev->hard_header_len+15)&~15); + skb->nh.raw = skb->data; + + eb = (struct ec_cb *)&skb->cb; + + eb->cookie = saddr->cookie; + eb->sec = *saddr; + eb->sent - ec_tx_done; + + if (dev->hard_header) { + int res; + err = -EINVAL; + res = dev->hard_header(skb, dev, ntohs(proto), &addr, NULL, len); + if (sock->type != SOCK_DGRAM) { + skb->tail = skb->data; + skb->len = 0; + } else if (res < 0) + goto out_free; + } + + /* Copy the data. Returns -EFAULT on error */ + err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + skb->protocol = proto; + skb->dev = dev; + skb->priority = sk->priority; + if (err) + goto out_free; + + err = -ENETDOWN; + if (!(dev->flags & IFF_UP)) + goto out_free; + + /* + * Now send it + */ + + dev_unlock_list(); + dev_queue_xmit(skb); + return(len); + + out_free: + kfree_skb(skb); + out_unlock: + dev_unlock_list(); +#else + err = -EPROTOTYPE; +#endif + return err; + } + +#ifdef CONFIG_ECONET_AUNUDP + /* AUN virtual Econet. */ + + if (udpsock == NULL) + return -ENETDOWN; /* No socket - can't send */ + + /* Make up a UDP datagram and hand it off to some higher intellect. */ + + memset(&udpdest, 0, sizeof(udpdest)); + udpdest.sin_family = AF_INET; + udpdest.sin_port = htons(AUN_PORT); + + /* At the moment we use the stupid Acorn scheme of Econet address + y.x maps to IP a.b.c.x. This should be replaced with something + more flexible and more aware of subnet masks. */ + { + struct in_device *idev = (struct in_device *)dev->ip_ptr; + unsigned long network = ntohl(idev->ifa_list->ifa_address) & + 0xffffff00; /* !!! */ + udpdest.sin_addr.s_addr = htonl(network | addr.station); + } + + ah.port = port; + ah.cb = cb & 0x7f; + ah.code = 2; /* magic */ + ah.pad = 0; + + /* tack our header on the front of the iovec */ + size = sizeof(struct aunhdr); + iov[0].iov_base = (void *)&ah; + iov[0].iov_len = size; + for (i = 0; i < msg->msg_iovlen; i++) { + void *base = msg->msg_iov[i].iov_base; + size_t len = msg->msg_iov[i].iov_len; + /* Check it now since we switch to KERNEL_DS later. */ + if ((err = verify_area(VERIFY_READ, base, len)) < 0) + return err; + iov[i+1].iov_base = base; + iov[i+1].iov_len = len; + size += len; + } + + /* Get a skbuff (no data, just holds our cb information) */ + if ((skb = sock_alloc_send_skb(sk, 0, 0, + msg->msg_flags & MSG_DONTWAIT, &err)) == NULL) + return err; + + eb = (struct ec_cb *)&skb->cb; + + eb->cookie = saddr->cookie; + eb->timeout = (5*HZ); + eb->start = jiffies; + ah.handle = aun_seq; + eb->seq = (aun_seq++); + eb->sec = *saddr; + + skb_queue_tail(&aun_queue, skb); + + udpmsg.msg_name = (void *)&udpdest; + udpmsg.msg_namelen = sizeof(udpdest); + udpmsg.msg_iov = &iov[0]; + udpmsg.msg_iovlen = msg->msg_iovlen + 1; + udpmsg.msg_control = NULL; + udpmsg.msg_controllen = 0; + udpmsg.msg_flags=0; + + oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ + err = sock_sendmsg(udpsock, &udpmsg, size); + set_fs(oldfs); +#else + err = -EPROTOTYPE; +#endif + return err; +} + +/* + * Look up the address of a socket. + */ + +static int econet_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct sock *sk = sock->sk; + struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr; + + if (peer) + return -EOPNOTSUPP; + + sec->sec_family = AF_ECONET; + sec->port = sk->protinfo.af_econet->port; + sec->addr.station = sk->protinfo.af_econet->station; + sec->addr.net = sk->protinfo.af_econet->net; + + *uaddr_len = sizeof(*sec); + return 0; +} + +static void econet_destroy_timer(unsigned long data) +{ + struct sock *sk=(struct sock *)data; + + if (!atomic_read(&sk->wmem_alloc) && !atomic_read(&sk->rmem_alloc)) { + sk_free(sk); + MOD_DEC_USE_COUNT; + return; + } + + sk->timer.expires=jiffies+10*HZ; + add_timer(&sk->timer); + printk(KERN_DEBUG "econet socket destroy delayed\n"); +} + +/* + * Close an econet socket. + */ + +static int econet_release(struct socket *sock, struct socket *peersock) +{ + struct sk_buff *skb; + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + sklist_remove_socket(&econet_sklist, sk); + + /* + * Now the socket is dead. No more input will appear. + */ + + sk->state_change(sk); /* It is useless. Just for sanity. */ + + sock->sk = NULL; + sk->socket = NULL; + sk->dead = 1; + + /* Purge queues */ + + while ((skb=skb_dequeue(&sk->receive_queue))!=NULL) + kfree_skb(skb); + + if (atomic_read(&sk->rmem_alloc) || atomic_read(&sk->wmem_alloc)) { + sk->timer.data=(unsigned long)sk; + sk->timer.expires=jiffies+HZ; + sk->timer.function=econet_destroy_timer; + add_timer(&sk->timer); + return 0; + } + + sk_free(sk); + MOD_DEC_USE_COUNT; + return 0; +} + +/* + * Create an Econet socket + */ + +static int econet_create(struct socket *sock, int protocol) +{ + struct sock *sk; + int err; + + /* Econet only provides datagram services. */ + if (sock->type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; + + sock->state = SS_UNCONNECTED; + MOD_INC_USE_COUNT; + + err = -ENOBUFS; + sk = sk_alloc(AF_ECONET, GFP_KERNEL, 1); + if (sk == NULL) + goto out; + + sk->reuse = 1; + sock->ops = &econet_ops; + sock_init_data(sock,sk); + + sk->protinfo.af_econet = kmalloc(sizeof(struct econet_opt), GFP_KERNEL); + if (sk->protinfo.af_econet == NULL) + goto out_free; + memset(sk->protinfo.af_econet, 0, sizeof(struct econet_opt)); + sk->zapped=0; + sk->family = AF_ECONET; + sk->num = protocol; + + sklist_insert_socket(&econet_sklist, sk); + return(0); + +out_free: + sk_free(sk); +out: + MOD_DEC_USE_COUNT; + return err; +} + +/* + * Handle Econet specific ioctls + */ + +static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void *arg) +{ + struct ifreq ifr; + struct ec_device *edev; + struct device *dev; + unsigned long flags; + struct sockaddr_ec *sec; + + /* + * Fetch the caller's info block into kernel space + */ + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + if ((dev = dev_get(ifr.ifr_name)) == NULL) + return -ENODEV; + + sec = (struct sockaddr_ec *)&ifr.ifr_addr; + + switch (cmd) + { + case SIOCSIFADDR: + spin_lock_irqsave(&edevlist_lock, flags); + edev = __edev_get(dev); + if (edev == NULL) + { + /* Magic up a new one. */ + edev = kmalloc(GFP_KERNEL, sizeof(struct ec_device)); + if (edev == NULL) { + printk("af_ec: memory squeeze.\n"); + spin_unlock_irqrestore(&edevlist_lock, flags); + return -ENOMEM; + } + memset(edev, 0, sizeof(struct ec_device)); + edev->dev = dev; + edev->next = edevlist; + edevlist = edev; + } + edev->station = sec->addr.station; + edev->net = sec->addr.net; + spin_unlock_irqrestore(&edevlist_lock, flags); + return 0; + + case SIOCGIFADDR: + spin_lock_irqsave(&edevlist_lock, flags); + edev = __edev_get(dev); + if (edev == NULL) + { + spin_unlock_irqrestore(&edevlist_lock, flags); + return -ENODEV; + } + memset(sec, 0, sizeof(struct sockaddr_ec)); + sec->addr.station = edev->station; + sec->addr.net = edev->net; + sec->sec_family = AF_ECONET; + spin_unlock_irqrestore(&edevlist_lock, flags); + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return 0; + } + + return -EINVAL; +} + +/* + * Handle generic ioctls + */ + +static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + int err; + int pid; + + switch(cmd) + { + case FIOSETOWN: + case SIOCSPGRP: + err = get_user(pid, (int *) arg); + if (err) + return err; + if (current->pid != pid && current->pgrp != -pid && !suser()) + return -EPERM; + sk->proc = pid; + return(0); + case FIOGETOWN: + case SIOCGPGRP: + return put_user(sk->proc, (int *)arg); + case SIOCGSTAMP: + if(sk->stamp.tv_sec==0) + return -ENOENT; + err = -EFAULT; + if (!copy_to_user((void *)arg, &sk->stamp, sizeof(struct timeval))) + err = 0; + return err; + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCGIFCONF: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + case SIOCGIFMEM: + case SIOCSIFMEM: + case SIOCGIFMTU: + case SIOCSIFMTU: + case SIOCSIFLINK: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case SIOCSIFMAP: + case SIOCGIFMAP: + case SIOCSIFSLAVE: + case SIOCGIFSLAVE: + case SIOCGIFINDEX: + case SIOCGIFNAME: + case SIOCGIFCOUNT: + case SIOCSIFHWBROADCAST: + return(dev_ioctl(cmd,(void *) arg)); + + + case SIOCSIFADDR: + case SIOCGIFADDR: + return ec_dev_ioctl(sock, cmd, (void *)arg); + break; + + default: + if ((cmd >= SIOCDEVPRIVATE) && + (cmd <= (SIOCDEVPRIVATE + 15))) + return(dev_ioctl(cmd,(void *) arg)); + +#ifdef CONFIG_NET_RADIO + if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST)) + return(dev_ioctl(cmd,(void *) arg)); +#endif + return -EOPNOTSUPP; + } + /*NOTREACHED*/ + return 0; +} + +static struct net_proto_family econet_family_ops = { + AF_ECONET, + econet_create +}; + +static struct proto_ops econet_ops = { + AF_ECONET, + + sock_no_dup, + econet_release, + econet_bind, + sock_no_connect, + NULL, + NULL, + econet_getname, + datagram_poll, + econet_ioctl, + sock_no_listen, + sock_no_shutdown, + sock_no_setsockopt, + sock_no_getsockopt, + sock_no_fcntl, + econet_sendmsg, + econet_recvmsg +}; + +/* + * Find the listening socket, if any, for the given data. + */ + +static struct sock *ec_listening_socket(unsigned char port, unsigned char + station, unsigned char net) +{ + struct sock *sk = econet_sklist; + + while (sk) + { + struct econet_opt *opt = sk->protinfo.af_econet; + if ((opt->port == port || opt->port == 0) && + (opt->station == station || opt->station == 0) && + (opt->net == net || opt->net == 0)) + return sk; + sk = sk->sklist_next; + } + + return NULL; +} + +#ifdef CONFIG_ECONET_AUNUDP + +/* + * Send an AUN protocol response. + */ + +static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb) +{ + struct sockaddr_in sin; + struct iovec iov; + struct aunhdr ah; + struct msghdr udpmsg; + int err; + mm_segment_t oldfs; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_port = htons(AUN_PORT); + sin.sin_addr.s_addr = addr; + + ah.code = code; + ah.pad = 0; + ah.port = 0; + ah.cb = cb; + ah.handle = seq; + + iov.iov_base = (void *)&ah; + iov.iov_len = sizeof(ah); + + udpmsg.msg_name = (void *)&sin; + udpmsg.msg_namelen = sizeof(sin); + udpmsg.msg_iov = &iov; + udpmsg.msg_iovlen = 1; + udpmsg.msg_control = NULL; + udpmsg.msg_controllen = 0; + udpmsg.msg_flags=0; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = sock_sendmsg(udpsock, &udpmsg, sizeof(ah)); + set_fs(oldfs); +} + +/* + * Handle incoming AUN packets. Work out if anybody wants them, + * and send positive or negative acknowledgements as appropriate. + */ + +static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) +{ + struct ec_device *edev = edev_get(skb->dev); + struct iphdr *ip = skb->nh.iph; + unsigned char stn = ntohl(ip->saddr) & 0xff; + struct sock *sk; + struct sk_buff *newskb; + struct ec_cb *eb; + struct sockaddr_ec *sec; + + if (edev == NULL) + return; /* Device not configured for AUN */ + + if ((sk = ec_listening_socket(ah->port, stn, edev->net)) == NULL) + goto bad; /* Nobody wants it */ + + newskb = alloc_skb((len - sizeof(struct aunhdr) + 15) & ~15, + GFP_ATOMIC); + if (newskb == NULL) + { + printk(KERN_DEBUG "AUN: memory squeeze, dropping packet.\n"); + /* Send nack and hope sender tries again */ + goto bad; + } + + eb = (struct ec_cb *)&newskb->cb; + sec = (struct sockaddr_ec *)&eb->sec; + memset(sec, 0, sizeof(struct sockaddr_ec)); + sec->sec_family = AF_ECONET; + sec->type = ECTYPE_PACKET_RECEIVED; + sec->port = ah->port; + sec->cb = ah->cb; + sec->addr.net = edev->net; + sec->addr.station = stn; + + memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah+1), + len - sizeof(struct aunhdr)); + + if (sock_queue_rcv_skb(sk, newskb) < 0) + { + /* Socket is bankrupt. */ + kfree_skb(newskb); + goto bad; + } + + aun_send_response(ip->saddr, ah->handle, 3, 0); + return; + +bad: + aun_send_response(ip->saddr, ah->handle, 4, 0); +} + +/* + * Handle incoming AUN transmit acknowledgements. If the sequence + * number matches something in our backlog then kill it and tell + * the user. If the remote took too long to reply then we may have + * dropped the packet already. + */ + +static void aun_tx_ack(unsigned long seq, int result) +{ + struct sk_buff *skb; + unsigned long flags; + struct ec_cb *eb; + + spin_lock_irqsave(&aun_queue_lock, flags); + skb = skb_peek(&aun_queue); + while (skb && skb != (struct sk_buff *)&aun_queue) + { + struct sk_buff *newskb = skb->next; + eb = (struct ec_cb *)&skb->cb; + if (eb->seq == seq) + goto foundit; + + skb = newskb; + } + spin_unlock_irqrestore(&aun_queue_lock, flags); + printk(KERN_DEBUG "AUN: unknown sequence %ld\n", seq); + return; + +foundit: + tx_result(skb->sk, eb->cookie, result); + skb_unlink(skb); + spin_unlock_irqrestore(&aun_queue_lock, flags); +} + +/* + * Deal with received AUN frames - sort out what type of thing it is + * and hand it to the right function. + */ + +static void aun_data_available(struct sock *sk, int slen) +{ + int err; + struct sk_buff *skb; + unsigned char *data; + struct aunhdr *ah; + struct iphdr *ip; + size_t len; + + while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) { + if (err == -EAGAIN) { + printk(KERN_ERR "AUN: no data available?!"); + return; + } + printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err); + } + + data = skb->h.raw + sizeof(struct udphdr); + ah = (struct aunhdr *)data; + len = skb->len - sizeof(struct udphdr); + ip = skb->nh.iph; + + switch (ah->code) + { + case 2: + aun_incoming(skb, ah, len); + break; + case 3: + aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_OK); + break; + case 4: + aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING); + break; +#if 0 + /* This isn't quite right yet. */ + case 5: + aun_send_response(ip->saddr, ah->handle, 6, ah->cb); + break; +#endif + default: + printk(KERN_DEBUG "unknown AUN packet (type %d)\n", data[0]); + } + + skb_free_datagram(sk, skb); +} + +/* + * Called by the timer to manage the AUN transmit queue. If a packet + * was sent to a dead or nonexistent host then we will never get an + * acknowledgement back. After a few seconds we need to spot this and + * drop the packet. + */ + +static spinlock_t aun_queue_lock; + +static void ab_cleanup(unsigned long h) +{ + struct sk_buff *skb; + unsigned long flags; + + spin_lock_irqsave(&aun_queue_lock, flags); + skb = skb_peek(&aun_queue); + while (skb && skb != (struct sk_buff *)&aun_queue) + { + struct sk_buff *newskb = skb->next; + struct ec_cb *eb = (struct ec_cb *)&skb->cb; + if ((jiffies - eb->start) > eb->timeout) + { + tx_result(skb->sk, eb->cookie, + ECTYPE_TRANSMIT_NOT_PRESENT); + skb_unlink(skb); + } + skb = newskb; + } + spin_unlock_irqrestore(&aun_queue_lock, flags); + + mod_timer(&ab_cleanup_timer, jiffies + (HZ*2)); +} + +__initfunc(static int aun_udp_initialise(void)) +{ + int error; + struct sockaddr_in sin; + + skb_queue_head_init(&aun_queue); + spin_lock_init(&aun_queue_lock); + init_timer(&ab_cleanup_timer); + ab_cleanup_timer.expires = jiffies + (HZ*2); + ab_cleanup_timer.function = ab_cleanup; + add_timer(&ab_cleanup_timer); + + memset(&sin, 0, sizeof(sin)); + sin.sin_port = htons(AUN_PORT); + + /* We can count ourselves lucky Acorn machines are too dim to + speak IPv6. :-) */ + if ((error = sock_create(AF_INET, SOCK_DGRAM, 0, &udpsock)) < 0) + { + printk("AUN: socket error %d\n", -error); + return error; + } + + udpsock->sk->reuse = 1; + udpsock->sk->allocation = GFP_ATOMIC; /* we're going to call it + from interrupts */ + + error = udpsock->ops->bind(udpsock, (struct sockaddr *)&sin, + sizeof(sin)); + if (error < 0) + { + printk("AUN: bind error %d\n", -error); + goto release; + } + + udpsock->sk->data_ready = aun_data_available; + + return 0; + +release: + sock_release(udpsock); + udpsock = NULL; + return error; +} +#endif + +static int econet_notifier(struct notifier_block *this, unsigned long msg, void *data) +{ + struct device *dev = (struct device *)data; + struct ec_device *edev; + unsigned long flags; + + switch (msg) { + case NETDEV_UNREGISTER: + /* A device has gone down - kill any data we hold for it. */ + spin_lock_irqsave(&edevlist_lock, flags); + for (edev = edevlist; edev; edev = edev->next) + { + if (edev->dev == dev) + { + if (edev->prev) + edev->prev->next = edev->next; + else + edevlist = edev->next; + if (edev->next) + edev->next->prev = edev->prev; + kfree(edev); + break; + } + } + spin_unlock_irqrestore(&edevlist_lock, flags); + break; + } + + return NOTIFY_DONE; +} + +struct notifier_block econet_netdev_notifier={ + econet_notifier, + NULL, + 0 +}; + +#ifdef MODULE +void cleanup_module(void) +{ +#ifdef CONFIG_ECONET_AUNUDP + del_timer(&ab_cleanup_timer); + if (udpsock) + sock_release(udpsock); +#endif + unregister_netdevice_notifier(&econet_netdev_notifier); + sock_unregister(econet_family_ops.family); + return; +} + +int init_module(void) +#else +__initfunc(void econet_proto_init(struct net_proto *pro)) +#endif +{ + spin_lock_init(&edevlist_lock); + spin_lock_init(&aun_queue_lock); + /* Stop warnings from happening on UP systems. */ + (void)edevlist_lock; + (void)aun_queue_lock; + sock_register(&econet_family_ops); +#ifdef CONFIG_ECONET_AUNUDP + aun_udp_initialise(); +#endif + register_netdevice_notifier(&econet_netdev_notifier); +#ifdef MODULE + return 0; +#endif +} diff -u --recursive --new-file v2.1.98/linux/net/ethernet/Makefile linux/net/ethernet/Makefile --- v2.1.98/linux/net/ethernet/Makefile Mon Apr 7 11:35:32 1997 +++ linux/net/ethernet/Makefile Tue Apr 28 11:10:11 1998 @@ -1,5 +1,5 @@ # -# Makefile for the Linux TCP/IP (INET) layer. +# Makefile for the Linux Ethernet layer. # # Note! Dependencies are done automagically by 'make dep', which also # removes any old dependencies. DON'T put your own dependencies here diff -u --recursive --new-file v2.1.98/linux/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c --- v2.1.98/linux/net/ipv4/af_inet.c Mon Apr 6 17:41:01 1998 +++ linux/net/ipv4/af_inet.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * AF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.69 1998/04/03 09:49:42 freitag Exp $ + * Version: $Id: af_inet.c,v 1.71 1998/04/16 05:38:16 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -1052,6 +1052,8 @@ #endif /* CONFIG_PROC_FS */ extern void tcp_init(void); +extern void tcp_v4_init(struct net_proto_family *); + /* * Called by socket.c on kernel startup. @@ -1101,8 +1103,11 @@ ip_init(); + tcp_v4_init(&inet_family_ops); + /* Setup TCP slab cache for open requests. */ tcp_init(); + /* * Set the ICMP layer up diff -u --recursive --new-file v2.1.98/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c --- v2.1.98/linux/net/ipv4/fib_rules.c Thu Mar 26 15:57:13 1998 +++ linux/net/ipv4/fib_rules.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * IPv4 Forwarding Information Base: policy rules. * - * Version: $Id: fib_rules.c,v 1.4 1998/03/21 07:27:58 davem Exp $ + * Version: $Id: fib_rules.c,v 1.5 1998/04/28 06:21:57 davem Exp $ * * Authors: Alexey Kuznetsov, * @@ -65,6 +65,9 @@ u8 r_flags; u8 r_tos; int r_ifindex; +#ifdef CONFIG_NET_CLS_ROUTE + __u32 r_tclassid; +#endif char r_ifname[IFNAMSIZ]; }; @@ -165,6 +168,10 @@ if (dev) new_r->r_ifindex = dev->ifindex; } +#ifdef CONFIG_NET_CLS_ROUTE + if (rta[RTA_FLOW-1]) + memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); +#endif rp = &fib_rules; if (!new_r->r_preference) { @@ -213,6 +220,16 @@ return saddr; } +#ifdef CONFIG_NET_CLS_ROUTE +u32 fib_rules_tclass(struct fib_result *res) +{ + if (res->r) + return res->r->r_tclassid; + return 0; +} +#endif + + static void fib_rules_detach(struct device *dev) { struct fib_rule *r; @@ -246,7 +263,7 @@ for (r = fib_rules; r; r=r->r_next) { if (((saddr^r->r_src) & r->r_srcmask) || ((daddr^r->r_dst) & r->r_dstmask) || -#ifdef CONFIG_IP_TOS_ROUTING +#ifdef CONFIG_IP_ROUTE_TOS (r->r_tos && r->r_tos != key->tos) || #endif (r->r_ifindex && r->r_ifindex != key->iif)) @@ -339,6 +356,10 @@ RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); if (r->r_srcmap) RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); +#ifdef CONFIG_NET_CLS_ROUTE + if (r->r_tclassid) + RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); +#endif nlh->nlmsg_len = skb->tail - b; return skb->len; diff -u --recursive --new-file v2.1.98/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c --- v2.1.98/linux/net/ipv4/fib_semantics.c Tue Mar 17 22:18:16 1998 +++ linux/net/ipv4/fib_semantics.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * IPv4 Forwarding Information Base: semantics. * - * Version: $Id: fib_semantics.c,v 1.7 1998/03/08 05:56:18 davem Exp $ + * Version: $Id: fib_semantics.c,v 1.8 1998/04/28 06:21:58 davem Exp $ * * Authors: Alexey Kuznetsov, * @@ -124,6 +124,9 @@ #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight != onh->nh_weight || #endif +#ifdef CONFIG_NET_CLS_ROUTE + nh->nh_tclassid != onh->nh_tclassid || +#endif ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) return -1; onh++; @@ -217,8 +220,12 @@ nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; nh->nh_oif = nhp->rtnh_ifindex; nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) + if (attrlen) { nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); +#ifdef CONFIG_NET_CLS_ROUTE + nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); +#endif + } nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); return 0; @@ -267,6 +274,11 @@ gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); if (gw && gw != nh->nh_gw) return 1; +#ifdef CONFIG_NET_CLS_ROUTE + gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); + if (gw && gw != nh->nh_tclassid) + return 1; +#endif } nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); @@ -459,6 +471,10 @@ goto err_inval; if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) goto err_inval; +#ifdef CONFIG_NET_CLS_ROUTE + if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) + goto err_inval; +#endif #else goto err_inval; #endif @@ -468,6 +484,10 @@ nh->nh_oif = *rta->rta_oif; if (rta->rta_gw) memcpy(&nh->nh_gw, rta->rta_gw, 4); +#ifdef CONFIG_NET_CLS_ROUTE + if (rta->rta_flow) + memcpy(&nh->nh_tclassid, rta->rta_flow, 4); +#endif nh->nh_flags = r->rtm_flags; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; @@ -654,6 +674,10 @@ if (fi->fib_rtt) RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &fi->fib_rtt); #else +#ifdef CONFIG_NET_CLS_ROUTE + if (fi->fib_nh[0].nh_tclassid) + RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); +#endif if (fi->fib_mtu || fi->fib_window || fi->fib_rtt) { int i; struct rtattr *mx = (struct rtattr *)skb->tail; diff -u --recursive --new-file v2.1.98/linux/net/ipv4/icmp.c linux/net/ipv4/icmp.c --- v2.1.98/linux/net/ipv4/icmp.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipv4/icmp.c Thu Apr 30 09:37:23 1998 @@ -3,7 +3,7 @@ * * Alan Cox, * - * Version: $Id: icmp.c,v 1.40 1998/04/11 09:38:24 freitag Exp $ + * Version: $Id: icmp.c,v 1.41 1998/04/29 22:12:10 alan Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -680,7 +680,7 @@ if (inet_addr_type(iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) - printk("%s sent an invalid ICMP error to a broadcast.\n", + printk(KERN_WARNING "%s sent an invalid ICMP error to a broadcast.\n", in_ntoa(skb->nh.iph->saddr)); return; } @@ -856,6 +856,9 @@ * All these rules are so bizarre, that I removed kernel addrmask * support at all. It is wrong, it is obsolete, nobody uses it in * any case. --ANK + * + * Furthermore you can do it with a usermode address agent program + * anyway... */ static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len) diff -u --recursive --new-file v2.1.98/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c --- v2.1.98/linux/net/ipv4/ip_output.c Wed Apr 1 20:11:55 1998 +++ linux/net/ipv4/ip_output.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.51 1998/03/28 00:55:34 davem Exp $ + * Version: $Id: ip_output.c,v 1.56 1998/04/17 02:36:46 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -31,6 +31,10 @@ * Andi Kleen: Fix broken PMTU recovery and remove * some redundant tests. * Vitaly E. Lavrov : Transparent proxy revived after year coma. + * Andi Kleen : Replace ip_reply with ip_send_reply. + * Andi Kleen : Split fast and slow ip_build_xmit path + * for decreased register pressure on x86 + * and more readibility. */ #include @@ -70,7 +74,6 @@ #include #include #include -#include /* * Shall we try to damage output packets if routing dev changes? @@ -88,6 +91,9 @@ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } +/* + * Add an ip header to a skbuff and send it out. + */ void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt) { @@ -303,16 +309,6 @@ if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT) goto drop; -#ifdef CONFIG_NET_SECURITY - /* Add an IP checksum (must do this before SECurity because - * of possible tunneling). - */ - ip_send_check(iph); - if (call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 4, &skb) < FW_ACCEPT) - goto drop; - iph = skb->nh.iph; - /* Don't update tot_len, as the dev->mtu is already decreased. */ -#endif /* This can happen when the transport layer has segments queued * with a cached route, and by the time we get here things are * re-routed to a device with a different MTU than the original @@ -335,10 +331,9 @@ if (tot_len > rt->u.dst.pmtu) goto fragment; -#ifndef CONFIG_NET_SECURITY /* Add an IP checksum. */ ip_send_check(iph); -#endif + skb->priority = sk->priority; skb->dst->output(skb); return; @@ -382,7 +377,7 @@ * length to be copied. */ -int ip_build_xmit(struct sock *sk, +int ip_build_xmit_slow(struct sock *sk, int getfrag (const void *, char *, unsigned int, @@ -397,91 +392,16 @@ int err; int offset, mf; unsigned short id; - struct iphdr *iph; + int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15; int nfrags=0; struct ip_options *opt = ipc->opt; int df = htons(IP_DF); -#ifdef CONFIG_NET_SECURITY - int fw_res; -#endif if (sk->ip_pmtudisc == IP_PMTUDISC_DONT || - (rt->u.dst.mxlock&(1<u.dst.mxlock&(1<ip_hdrincl) - length += sizeof(struct iphdr); - - if (length <= rt->u.dst.pmtu && opt == NULL) { - int error; - struct sk_buff *skb=sock_alloc_send_skb(sk, length+hh_len+15, - 0, flags&MSG_DONTWAIT, &error); - if(skb==NULL) { - ip_statistics.IpOutDiscards++; - return error; - } - - skb->priority = sk->priority; - skb->dst = dst_clone(&rt->u.dst); - skb_reserve(skb, hh_len); - - skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length); - - dev_lock_list(); - - if(!sk->ip_hdrincl) { - iph->version=4; - iph->ihl=5; - iph->tos=sk->ip_tos; - iph->tot_len = htons(length); - iph->id=htons(ip_id_count++); - iph->frag_off = df; - iph->ttl=sk->ip_mc_ttl; - if (rt->rt_type != RTN_MULTICAST) - iph->ttl=sk->ip_ttl; - iph->protocol=sk->protocol; - iph->saddr=rt->rt_src; - iph->daddr=rt->rt_dst; - iph->check=0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4); - } - else - err = getfrag(frag, (void *)iph, 0, length); - dev_unlock_list(); - - if (err) - err = -EFAULT; - - if(!err && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT) - err = -EPERM; -#ifdef CONFIG_NET_SECURITY - if ((fw_res=call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 5, &skb))u.dst.output(skb); - } - + if (!sk->ip_hdrincl) length -= sizeof(struct iphdr); @@ -497,7 +417,7 @@ */ maxfraglen = ((rt->u.dst.pmtu-sizeof(struct iphdr)) & ~7) + fragheaderlen; - } + } if (length + fragheaderlen > 0xFFFF) return -EMSGSIZE; @@ -551,9 +471,9 @@ */ do { - struct sk_buff * skb; int error; char *data; + struct sk_buff * skb; /* * Get the memory we require with some space left for alignment. @@ -581,13 +501,15 @@ */ data = skb_put(skb, fraglen); - skb->nh.iph = iph = (struct iphdr *)data; + skb->nh.iph = (struct iphdr *)data; /* * Only write IP header onto non-raw packets */ if(!sk->ip_hdrincl) { + struct iphdr *iph = (struct iphdr *)data; + iph->version = 4; iph->ihl = 5; if (opt) { @@ -622,49 +544,148 @@ * User data callback */ - err = getfrag(frag, data, offset, fraglen-fragheaderlen); - if (err) + err = 0; + if (getfrag(frag, data, offset, fraglen-fragheaderlen)) err = -EFAULT; /* * Account for the fragment. */ - - if(!err && !offset && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT) + + if(!err && offset == 0 && + call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb) < FW_ACCEPT) err = -EPERM; -#ifdef CONFIG_NET_SECURITY - if ((fw_res=call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 6, &skb))u.dst.output(skb)) { - if (nfrags>1) - ip_statistics.IpFragCreates += nfrags; - dev_unlock_list(); - return -ENETDOWN; + err = -ENETDOWN; + ip_statistics.IpOutDiscards++; + break; } } while (offset >= 0); if (nfrags>1) ip_statistics.IpFragCreates += nfrags; + dev_unlock_list(); + return err; +} + + +/* + * Fast path for unfragmented packets. + */ +int ip_build_xmit(struct sock *sk, + int getfrag (const void *, + char *, + unsigned int, + unsigned int), + const void *frag, + unsigned length, + struct ipcm_cookie *ipc, + struct rtable *rt, + int flags) +{ + int err; + struct sk_buff *skb; + int df; + struct iphdr *iph; + + /* + * Try the simple case first. This leaves fragmented frames, and by + * choice RAW frames within 20 bytes of maximum size(rare) to the long path + */ + + if (!sk->ip_hdrincl) + length += sizeof(struct iphdr); + + /* + * Check for slow path. + */ + if (length > rt->u.dst.pmtu || ipc->opt != NULL) + return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags); + + /* + * Do path mtu discovery if needed. + */ + df = htons(IP_DF); + if (sk->ip_pmtudisc == IP_PMTUDISC_DONT || + (rt->u.dst.mxlock&(1<u.dst.dev->hard_header_len + 15)&~15; + + skb = sock_alloc_send_skb(sk, length+hh_len+15, + 0, flags&MSG_DONTWAIT, &err); + if(skb==NULL) + goto error; + skb_reserve(skb, hh_len); + } + + skb->priority = sk->priority; + skb->dst = dst_clone(&rt->u.dst); + + skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length); + + dev_lock_list(); + + if(!sk->ip_hdrincl) { + iph->version=4; + iph->ihl=5; + iph->tos=sk->ip_tos; + iph->tot_len = htons(length); + iph->id=htons(ip_id_count++); + iph->frag_off = df; + iph->ttl=sk->ip_mc_ttl; + if (rt->rt_type != RTN_MULTICAST) + iph->ttl=sk->ip_ttl; + iph->protocol=sk->protocol; + iph->saddr=rt->rt_src; + iph->daddr=rt->rt_dst; + iph->check=0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4); + } + else + err = getfrag(frag, (void *)iph, 0, length); dev_unlock_list(); - return 0; + + if (err) + err = -EFAULT; + + if(!err && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT) + err = -EPERM; + + if (err) { + kfree_skb(skb); + goto error; + } + + return rt->u.dst.output(skb); + +error: + ip_statistics.IpOutDiscards++; + return err; } + + /* * This IP datagram is too large to be sent in one piece. Break it up into @@ -682,7 +703,7 @@ unsigned char *ptr; struct device *dev; struct sk_buff *skb2; - int left, mtu, hlen, len; + unsigned int mtu, hlen, left, len; int offset; int not_last_frag; u16 dont_fragment; @@ -712,11 +733,8 @@ * in this case we were fortunate it didn't happen */ - if (mtu<8) { - ip_statistics.IpFragFails++; - kfree_skb(skb); - return; - } + if (mtu<8) + goto fail; /* * Fragment the datagram. @@ -745,8 +763,7 @@ /* IF: we are not sending upto and including the packet end then align the next start on an eight byte boundary */ if (len < left) { - len/=8; - len*=8; + len &= ~7; } /* * Allocate buffer. @@ -754,9 +771,7 @@ if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) { NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n")); - ip_statistics.IpFragFails++; - kfree_skb(skb); - return; + goto fail; } /* @@ -829,61 +844,96 @@ } kfree_skb(skb); ip_statistics.IpFragOKs++; + return; + +fail: + kfree_skb(skb); + ip_statistics.IpFragFails++; } -struct sk_buff * ip_reply(struct sk_buff *skb, int payload) +/* + * Fetch data from kernel space and fill in checksum if needed. + */ +static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset, + unsigned int fraglen) +{ + struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr; + u16 *pktp = (u16 *)to; + struct iovec *iov; + int len; + int hdrflag = 1; + +#if 0 + printk("ip_reply_glue_bits: offset=%u,flen=%u iov[0].l=%u,iov[1].len=%u\n", + offset,fraglen,dp->iov[0].iov_len,dp->iov[1].iov_len); +#endif + + iov = &dp->iov[0]; + if (offset >= iov->iov_len) { + offset -= iov->iov_len; + iov++; + hdrflag = 0; + } + len = iov->iov_len - offset; + if (fraglen > len) { /* overlapping. */ +#if 1 + if (iov > &dp->iov[0]) { + printk("frag too long! (o=%u,fl=%u)\n",offset,fraglen); + return -1; + } +#endif + dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len, + dp->csum); + offset = 0; + fraglen -= len; + to += len; + iov++; + } + + dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen, + dp->csum); + + if (hdrflag && dp->csumoffset) + *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */ + return 0; +} + +/* + * Generic function to send a packet as reply to another packet. + * Used to send TCP resets so far. ICMP should use this function too. + * + * Should run single threaded per socket because it uses the sock + * structure to pass arguments. + */ +void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, + unsigned int len) { struct { struct ip_options opt; char data[40]; } replyopts; - - struct rtable *rt = (struct rtable*)skb->dst; - struct sk_buff *reply; - int iphlen; - struct iphdr *iph; - struct ipcm_cookie ipc; u32 daddr; - + struct rtable *rt = (struct rtable*)skb->dst; + if (ip_options_echo(&replyopts.opt, skb)) - return NULL; + return; + + sk->ip_tos = skb->nh.iph->tos; + sk->priority = skb->priority; + sk->protocol = skb->nh.iph->protocol; daddr = ipc.addr = rt->rt_src; ipc.opt = &replyopts.opt; + if (ipc.opt->srr) daddr = replyopts.opt.faddr; - if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) - return NULL; - - iphlen = sizeof(struct iphdr) + replyopts.opt.optlen; - reply = alloc_skb(rt->u.dst.dev->hard_header_len+15+iphlen+payload, GFP_ATOMIC); - if (reply == NULL) { - ip_rt_put(rt); - return NULL; - } - - reply->priority = skb->priority; - reply->dst = &rt->u.dst; - skb_reserve(reply, (rt->u.dst.dev->hard_header_len+15)&~15); - - /* Now build the IP header. */ - reply->nh.iph = iph = (struct iphdr *)skb_put(reply, iphlen); - - iph->version = 4; - iph->ihl = iphlen>>2; - iph->tos = skb->nh.iph->tos; - iph->frag_off = 0; - iph->ttl = MAXTTL; - iph->daddr = rt->rt_dst; - iph->saddr = rt->rt_src; - iph->protocol = skb->nh.iph->protocol; - iph->id = htons(ip_id_count++); - - ip_options_build(reply, &replyopts.opt, daddr, rt, 0); + return; - return reply; + /* And let IP do all the hard work. */ + ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT); + ip_rt_put(rt); } /* diff -u --recursive --new-file v2.1.98/linux/net/ipv4/ipmr.c linux/net/ipv4/ipmr.c --- v2.1.98/linux/net/ipv4/ipmr.c Tue Mar 17 22:18:16 1998 +++ linux/net/ipv4/ipmr.c Tue Apr 28 11:10:11 1998 @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.33 1998/03/08 20:52:37 davem Exp $ + * Version: $Id: ipmr.c,v 1.34 1998/04/28 06:21:59 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. @@ -703,8 +703,7 @@ mrtsock_destruct(sk); return -EADDRINUSE; case MRT_DONE: - mrtsock_destruct(sk); - return 0; + return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: case MRT_DEL_VIF: if(optlen!=sizeof(vif)) diff -u --recursive --new-file v2.1.98/linux/net/ipv4/proc.c linux/net/ipv4/proc.c --- v2.1.98/linux/net/ipv4/proc.c Mon Apr 6 17:41:01 1998 +++ linux/net/ipv4/proc.c Tue Apr 28 11:10:11 1998 @@ -7,7 +7,7 @@ * PROC file system. It is mainly used for debugging and * statistics. * - * Version: $Id: proc.c,v 1.28 1998/04/03 09:49:45 freitag Exp $ + * Version: $Id: proc.c,v 1.30 1998/04/16 16:29:05 freitag Exp $ * * Authors: Fred N. van Kempen, * Gerald J. Heim, @@ -99,12 +99,19 @@ destp = ntohs(destp); srcp = ntohs(srcp); if((format == 0) && (sp->state == TCP_TIME_WAIT)) { + extern int tcp_tw_death_row_slot; struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp; + int slot_dist; tw_bucket = 1; timer_active1 = timer_active2 = 0; timer_active = 3; - timer_expires = tw->timer.expires; + slot_dist = tw->death_slot; + if(slot_dist > tcp_tw_death_row_slot) + slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot; + else + slot_dist = tcp_tw_death_row_slot - slot_dist; + timer_expires = jiffies + (slot_dist * TCP_TWKILL_PERIOD); } else { timer_active1 = del_timer(&tp->retransmit_timer); timer_active2 = del_timer(&sp->timer); @@ -349,11 +356,13 @@ int len; len = sprintf(buffer, - "TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed\n" - "TcpExt: %lu %lu %lu\n", + "TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed" + "EmbryonicRsts\n" + "TcpExt: %lu %lu %lu %lu\n", net_statistics.SyncookiesSent, net_statistics.SyncookiesRecv, - net_statistics.SyncookiesFailed); + net_statistics.SyncookiesFailed, + net_statistics.EmbryonicRsts); if (offset >= len) { diff -u --recursive --new-file v2.1.98/linux/net/ipv4/route.c linux/net/ipv4/route.c --- v2.1.98/linux/net/ipv4/route.c Thu Mar 26 15:57:13 1998 +++ linux/net/ipv4/route.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.42 1998/03/20 09:12:09 davem Exp $ + * Version: $Id: route.c,v 1.47 1998/04/28 06:22:01 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -577,7 +577,7 @@ if (rt != NULL) { if (dst->obsolete || rt->rt_flags&RTCF_REDIRECTED) { #if RT_CACHE_DEBUG >= 1 - printk(KERN_DEBUG "ip_rt_advice: redirect to %08x/%02x dropped\n", rt->rt_dst, rt->key.tos); + printk(KERN_DEBUG "ip_rt_advice: redirect to %d.%d.%d.%d/%02x dropped\n", NIPQUAD(rt->rt_dst), rt->key.tos); #endif ip_rt_put(rt); rt_cache_flush(0); @@ -725,11 +725,11 @@ mtu = guess_mtu(old_mtu); } - if (mtu < rth->u.dst.pmtu) { - /* New mtu received -> path was valid */ - dst_confirm(&rth->u.dst); - - rth->u.dst.pmtu = mtu; + if (mtu <= rth->u.dst.pmtu) { + if (mtu < rth->u.dst.pmtu) { + dst_confirm(&rth->u.dst); + rth->u.dst.pmtu = mtu; + } est_mtu = mtu; } } @@ -808,11 +808,18 @@ #endif rt->u.dst.window= fi->fib_window ? : 0; rt->u.dst.rtt = fi->fib_rtt ? : TCP_TIMEOUT_INIT; +#ifdef CONFIG_NET_CLS_ROUTE + rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; +#endif } else { rt->u.dst.pmtu = rt->u.dst.dev->mtu; rt->u.dst.window= 0; rt->u.dst.rtt = TCP_TIMEOUT_INIT; } +#ifdef CONFIG_NET_CLS_ROUTE + if (rt->u.dst.tclassid == 0) + rt->u.dst.tclassid = fib_rules_tclass(res); +#endif rt->rt_type = res->type; } @@ -1205,6 +1212,9 @@ key.oif = oif; key.scope = (tos&RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; res.fi = NULL; +#ifdef CONFIG_IP_MULTIPLE_TABLES + res.r = NULL; +#endif if (saddr) { if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr)) diff -u --recursive --new-file v2.1.98/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c --- v2.1.98/linux/net/ipv4/tcp.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipv4/tcp.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.111 1998/04/06 16:09:05 davem Exp $ + * Version: $Id: tcp.c,v 1.114 1998/04/26 01:11:33 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -702,7 +702,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int mss_now = sk->mss; + int mss_now; int err = 0; int copied = 0; @@ -715,14 +715,7 @@ if((err = wait_for_tcp_connect(sk, flags)) != 0) return err; - /* The socket is locked, nothing can change the state of pending - * SACKs or IP options. - */ - if(tp->sack_ok && tp->num_sacks) - mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)); - if(sk->opt && sk->opt->optlen) - mss_now -= (sk->opt->optlen); + mss_now = tcp_current_mss(sk); /* Ok commence sending. */ while(--iovlen >= 0) { @@ -842,6 +835,11 @@ goto do_interrupted; } wait_for_tcp_memory(sk); + + /* If SACK's were formed or PMTU events happened, + * we must find out about it. + */ + mss_now = tcp_current_mss(sk); continue; } @@ -908,10 +906,8 @@ /* If we're closed, don't send an ack, or we'll get a RST * from the closed destination. */ - if ((1 << sk->state) & (TCPF_CLOSE|TCPF_TIME_WAIT)) - return; - - tcp_send_ack(sk); + if (sk->state != TCP_CLOSE) + tcp_send_ack(sk); } /* @@ -1402,7 +1398,12 @@ return; } - sk->keepopen = 1; + /* It is questionable, what the role of this is now. + * In any event either it should be removed, or + * increment of SLT_KEEPALIVE be done, this is causing + * big problems. For now I comment it out. -DaveM + */ + /* sk->keepopen = 1; */ sk->shutdown = SHUTDOWN_MASK; if (!sk->dead) diff -u --recursive --new-file v2.1.98/linux/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c --- v2.1.98/linux/net/ipv4/tcp_input.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipv4/tcp_input.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.106 1998/04/10 23:56:19 davem Exp $ + * Version: $Id: tcp_input.c,v 1.114 1998/04/28 06:42:22 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -47,6 +47,9 @@ * Andrey Savochkin: Check sequence numbers correctly when * removing SACKs due to in sequence incoming * data segments. + * Andi Kleen: Make sure we never ack data there is not + * enough room for. Also make this condition + * a fatal error if it might still happen. */ #include @@ -76,6 +79,8 @@ int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; +static int prune_queue(struct sock *sk); + /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The @@ -343,6 +348,13 @@ if (!no_fancy && sysctl_tcp_window_scaling) { tp->wscale_ok = 1; tp->snd_wscale = *(__u8 *)ptr; + if(tp->snd_wscale > 14) { + if(net_ratelimit()) + printk("tcp_parse_options: Illegal window " + "scaling value %d >14 received.", + tp->snd_wscale); + tp->snd_wscale = 14; + } } break; case TCPOPT_TIMESTAMP: @@ -598,11 +610,13 @@ int acked = 0; while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) { + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + /* If our packet is before the ack sequence we can * discard it as it's confirmed to have arrived at * the other end. */ - if (after(TCP_SKB_CB(skb)->end_seq, ack)) + if (after(scb->end_seq, ack)) break; /* Initial outgoing SYN's get put onto the write_queue @@ -612,8 +626,8 @@ * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { - __u8 sacked = TCP_SKB_CB(skb)->sacked; + if(!(scb->flags & TCPCB_FLAG_SYN)) { + __u8 sacked = scb->sacked; acked |= FLAG_DATA_ACKED; if(sacked & TCPCB_SACKED_RETRANS) { @@ -634,8 +648,8 @@ tp->retrans_head = NULL; } tp->packets_out--; - *seq = TCP_SKB_CB(skb)->seq; - *seq_rtt = now - TCP_SKB_CB(skb)->when; + *seq = scb->seq; + *seq_rtt = now - scb->when; __skb_unlink(skb, skb->list); kfree_skb(skb); } @@ -850,13 +864,12 @@ } /* New-style handling of TIME_WAIT sockets. */ -static void tcp_timewait_kill(unsigned long __arg) -{ - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)__arg; - - /* Zap the timer. */ - del_timer(&tw->timer); +extern void tcp_tw_schedule(struct tcp_tw_bucket *tw); +extern void tcp_tw_reschedule(struct tcp_tw_bucket *tw); +extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); +void tcp_timewait_kill(struct tcp_tw_bucket *tw) +{ /* Unlink from various places. */ if(tw->bind_next) tw->bind_next->bind_pprev = tw->bind_pprev; @@ -908,7 +921,8 @@ isn = tw->rcv_nxt + 128000; if(isn == 0) isn++; - tcp_timewait_kill((unsigned long)tw); + tcp_tw_deschedule(tw); + tcp_timewait_kill(tw); sk = af_specific->get_sock(skb, th); if(sk == NULL || !ipsec_sk_policy(sk,skb)) return 0; @@ -925,16 +939,16 @@ * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if(sysctl_tcp_rfc1337 == 0) - tcp_timewait_kill((unsigned long)tw); - + if(sysctl_tcp_rfc1337 == 0) { + tcp_tw_deschedule(tw); + tcp_timewait_kill(tw); + } if(!th->rst) return 1; /* toss a reset back */ } else { - if(th->ack) { - /* In this case we must reset the TIMEWAIT timer. */ - mod_timer(&tw->timer, jiffies + TCP_TIMEWAIT_LEN); - } + /* In this case we must reset the TIMEWAIT timer. */ + if(th->ack) + tcp_tw_reschedule(tw); } return 0; /* Discard the frame. */ } @@ -1010,11 +1024,7 @@ tcp_tw_hashdance(sk, tw); /* Get the TIME_WAIT timeout firing. */ - init_timer(&tw->timer); - tw->timer.function = tcp_timewait_kill; - tw->timer.data = (unsigned long) tw; - tw->timer.expires = jiffies + TCP_TIMEWAIT_LEN; - add_timer(&tw->timer); + tcp_tw_schedule(tw); /* CLOSE the SK. */ if(sk->state == TCP_ESTABLISHED) @@ -1440,6 +1450,20 @@ if (skb->len == 0 && !th->fin) return(0); + /* + * If our receive queue has grown past its limits shrink it. + * Make sure to do this before moving snd_nxt, otherwise + * data might be acked for that we don't have enough room. + */ + if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { + if (prune_queue(sk) < 0) { + /* Still not enough room. That can happen when + * skb->true_size differs significantly from skb->len. + */ + return 0; + } + } + /* We no longer have anyone receiving data on this connection. */ tcp_data_queue(sk, skb); @@ -1497,7 +1521,7 @@ */ /* Two full frames received or... */ - if (((tp->rcv_nxt - tp->rcv_wup) >= (sk->mss << 1)) || + if (((tp->rcv_nxt - tp->rcv_wup) >= sk->mss * MAX_DELAY_ACK) || /* We will update the window "significantly" or... */ tcp_raise_window(sk) || /* We entered "quick ACK" mode or... */ @@ -1599,7 +1623,7 @@ * Clean first the out_of_order queue, then the receive queue until * the socket is in its memory limits again. */ -static void prune_queue(struct sock *sk) +static int prune_queue(struct sock *sk) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct sk_buff * skb; @@ -1613,7 +1637,7 @@ while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue))) { kfree_skb(skb); if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) - return; + return 0; } /* Now continue with the receive queue if it wasn't enough */ @@ -1626,9 +1650,10 @@ /* Never remove packets that have been already acked */ if (before(TCP_SKB_CB(skb)->end_seq, tp->last_ack_sent+1)) { - printk(KERN_DEBUG "prune_queue: hit acked data c=%x,%x,%x\n", - tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->last_ack_sent); - break; + SOCK_DEBUG(sk, "prune_queue: hit acked data c=%x,%x,%x\n", + tp->copied_seq, TCP_SKB_CB(skb)->end_seq, + tp->last_ack_sent); + return -1; } __skb_unlink(skb, skb->list); tp->rcv_nxt = TCP_SKB_CB(skb)->seq; @@ -1639,6 +1664,7 @@ if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) break; } + return 0; } int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, @@ -1763,13 +1789,11 @@ /* step 7: process the segment text */ queued = tcp_data(skb, sk, len); - tcp_data_snd_check(sk); - - /* If our receive queue has grown past its limits shrink it */ - if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) - prune_queue(sk); - - tcp_ack_snd_check(sk); + /* Be careful, tcp_data() may have put this into TIME_WAIT. */ + if(sk->state != TCP_CLOSE) { + tcp_data_snd_check(sk); + tcp_ack_snd_check(sk); + } if (!queued) { discard: @@ -1779,42 +1803,44 @@ return 0; } -/* Shared between IPv4 and IPv6 now. */ -struct sock * -tcp_check_req(struct sock *sk, struct sk_buff *skb, struct open_request *req) +/* + * Process an incoming SYN or SYN-ACK. + */ + +struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + struct open_request *req) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + u32 flg; /* assumption: the socket is not in use. * as we checked the user count on tcp_rcv and we're * running from a soft interrupt. */ + /* Check for syn retransmission */ + flg = *(((u32 *)skb->h.th) + 3); + + flg &= __constant_htonl(0x00170000); + /* Only SYN set? */ + if (flg == __constant_htonl(0x00020000)) { + if (!after(TCP_SKB_CB(skb)->seq, req->rcv_isn)) { + /* retransmited syn. + */ + req->class->rtx_syn_ack(sk, req); + return NULL; + } else { + return sk; /* Pass new SYN to the listen socket. */ + } + } + + /* We know it's an ACK here */ if (req->sk) { /* socket already created but not * yet accepted()... */ sk = req->sk; } else { - u32 flg; - - /* Check for syn retransmission */ - flg = *(((u32 *)skb->h.th) + 3); - - flg &= __constant_htonl(0x00170000); - /* Only SYN set? */ - if (flg == __constant_htonl(0x00020000)) { - if (!after(TCP_SKB_CB(skb)->seq, req->rcv_isn)) { - /* retransmited syn. - */ - req->class->rtx_syn_ack(sk, req); - return NULL; - } else { - return sk; /* New SYN */ - } - } - - /* We know it's an ACK here */ /* In theory the packet could be for a cookie, but * TIME_WAIT should guard us against this. * XXX: Nevertheless check for cookies? @@ -1901,6 +1927,8 @@ /* We got an ack, but it's not a good ack. */ if(!tcp_ack(sk,th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len)) { + sk->err = ECONNRESET; + sk->state_change(sk); tcp_statistics.TcpAttemptFails++; return 1; } @@ -1914,6 +1942,8 @@ /* A valid ack from a different connection * start. Shouldn't happen but cover it. */ + sk->err = ECONNRESET; + sk->state_change(sk); tcp_statistics.TcpAttemptFails++; return 1; } @@ -2112,8 +2142,10 @@ break; case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) + if (tp->snd_una == tp->write_seq) { tcp_time_wait(sk); + goto discard; + } break; case TCP_LAST_ACK: @@ -2155,10 +2187,6 @@ case TCP_ESTABLISHED: queued = tcp_data(skb, sk, len); - - /* This can only happen when MTU+skbheader > rcvbuf */ - if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) - prune_queue(sk); break; } diff -u --recursive --new-file v2.1.98/linux/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c --- v2.1.98/linux/net/ipv4/tcp_ipv4.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipv4/tcp_ipv4.c Thu Apr 30 09:37:23 1998 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.133 1998/04/06 08:42:28 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.142 1998/04/30 12:00:45 davem Exp $ * * IPv4 specific functions * @@ -48,8 +48,10 @@ #include #include +#include #include #include +#include #include #include @@ -69,6 +71,10 @@ /* Check TCP sequence numbers in ICMP packets. */ #define ICMP_MIN_LENGTH 8 +/* Socket used for sending RSTs */ +struct inode tcp_inode; +struct socket *tcp_socket=&tcp_inode.u.socket_i; + static void tcp_v4_send_reset(struct sk_buff *skb); void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, @@ -160,6 +166,18 @@ return tb; } +/* Ensure that the bound bucket for the port exists. + * Return 0 on success. + */ +static __inline__ int tcp_bucket_check(unsigned short snum) +{ + if (tcp_bound_hash[tcp_bhashfn(snum)] == NULL && + tcp_bucket_create(snum) == NULL) + return 1; + else + return 0; +} + static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum) { struct tcp_bind_bucket *tb; @@ -850,49 +868,42 @@ static void tcp_v4_send_reset(struct sk_buff *skb) { struct tcphdr *th = skb->h.th; + struct tcphdr rth; + struct ip_reply_arg arg; /* Never send a reset in response to a reset. */ - if (th->rst == 0) { - struct tcphdr *th = skb->h.th; - struct sk_buff *skb1 = ip_reply(skb, sizeof(struct tcphdr)); - struct tcphdr *th1; + if (th->rst) + return; - if (skb1 == NULL) - return; - - skb1->h.th = th1 = (struct tcphdr *) - skb_put(skb1, sizeof(struct tcphdr)); + /* Swap the send and the receive. */ + memset(&rth, 0, sizeof(struct tcphdr)); + rth.dest = th->source; + rth.source = th->dest; + rth.doff = sizeof(struct tcphdr)/4; + rth.rst = 1; - /* Swap the send and the receive. */ - memset(th1, 0, sizeof(*th1)); - th1->dest = th->source; - th1->source = th->dest; - th1->doff = sizeof(*th1)/4; - th1->rst = 1; + if (th->ack) { + rth.seq = th->ack_seq; + } else { + rth.ack = 1; + rth.ack_seq = th->syn ? htonl(ntohl(th->seq)+1) : th->seq; + } - if (th->ack) { - th1->seq = th->ack_seq; - } else { - th1->ack = 1; - if (!th->syn) - th1->ack_seq = th->seq; - else - th1->ack_seq = htonl(ntohl(th->seq)+1); - } - skb1->csum = csum_partial((u8 *) th1, sizeof(*th1), 0); - th1->check = tcp_v4_check(th1, sizeof(*th1), skb1->nh.iph->saddr, - skb1->nh.iph->daddr, skb1->csum); - - /* Finish up some IP bits. */ - skb1->nh.iph->tot_len = htons(skb1->len); - ip_send_check(skb1->nh.iph); + memset(&arg, 0, sizeof arg); + arg.iov[0].iov_base = (unsigned char *)&rth; + arg.iov[0].iov_len = sizeof rth; + arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, + skb->nh.iph->saddr, /*XXX*/ + sizeof(struct tcphdr), + IPPROTO_TCP, + 0); + arg.n_iov = 1; + arg.csumoffset = offsetof(struct tcphdr, check) / sizeof(u16); - /* All the other work was done by ip_reply(). */ - skb1->dst->output(skb1); + ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth); - tcp_statistics.TcpOutSegs++; - tcp_statistics.TcpOutRsts++; - } + tcp_statistics.TcpOutSegs++; + tcp_statistics.TcpOutRsts++; } #ifdef CONFIG_IP_TRANSPARENT_PROXY @@ -1277,12 +1288,19 @@ return NULL; dst = &rt->u.dst; } - - sk->tp_pinfo.af_tcp.syn_backlog--; - sk->ack_backlog++; +#ifdef CONFIG_IP_TRANSPARENT_PROXY + /* The new socket created for transparent proxy may fall + * into a non-existed bind bucket because sk->num != newsk->num. + * Ensure existance of the bucket now. The placement of the check + * later will require to destroy just created newsk in the case of fail. + * 1998/04/22 Andrey V. Savochkin + */ + if (tcp_bucket_check(ntohs(skb->h.th->dest))) + goto exit; +#endif mtu = dst->pmtu; - if (mtu < 68) + if (mtu < 68) /* XXX: we should turn pmtu disc off when this happens. */ mtu = 68; snd_mss = mtu - sizeof(struct iphdr); @@ -1290,6 +1308,9 @@ if (!newsk) goto exit; + sk->tp_pinfo.af_tcp.syn_backlog--; + sk->ack_backlog++; + newsk->dst_cache = dst; newtp = &(newsk->tp_pinfo.af_tcp); @@ -1329,6 +1350,8 @@ (req->sk ? sk->ack_backlog : tp->syn_backlog)--; req->class->destructor(req); tcp_openreq_free(req); + + net_statistics.EmbryonicRsts++; } /* Check for embryonic sockets (open_requests) We check packets with @@ -1358,9 +1381,9 @@ sk = tcp_check_req(sk, skb, req); } #ifdef CONFIG_SYN_COOKIES - else { + else { sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); - } + } #endif } return sk; @@ -1454,9 +1477,9 @@ if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) { printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, " "len=%d/%d/%d\n", - NIPQUAD(ntohl(skb->nh.iph->saddr)), + NIPQUAD(skb->nh.iph->saddr), ntohs(th->source), - NIPQUAD(ntohl(skb->nh.iph->daddr)), + NIPQUAD(skb->nh.iph->daddr), ntohs(th->dest), len, skb->len, ntohs(skb->nh.iph->tot_len)); @@ -1712,3 +1735,25 @@ 0, /* inuse */ 0 /* highestinuse */ }; + + + +__initfunc(void tcp_v4_init(struct net_proto_family *ops)) +{ + int err; + + tcp_inode.i_mode = S_IFSOCK; + tcp_inode.i_sock = 1; + tcp_inode.i_uid = 0; + tcp_inode.i_gid = 0; + + tcp_socket->inode = &tcp_inode; + tcp_socket->state = SS_UNCONNECTED; + tcp_socket->type=SOCK_RAW; + + if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0) + panic("Failed to create the TCP control socket.\n"); + tcp_socket->sk->allocation=GFP_ATOMIC; + tcp_socket->sk->num = 256; /* Don't receive any data */ + tcp_socket->sk->ip_ttl = MAXTTL; +} diff -u --recursive --new-file v2.1.98/linux/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c --- v2.1.98/linux/net/ipv4/tcp_output.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipv4/tcp_output.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.84 1998/04/06 08:48:29 davem Exp $ + * Version: $Id: tcp_output.c,v 1.87 1998/04/26 01:11:35 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -234,18 +234,14 @@ void tcp_write_xmit(struct sock *sk) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int mss_now = sk->mss; + unsigned int mss_now; /* Account for SACKS, we may need to fragment due to this. * It is just like the real MSS changing on us midstream. * We also handle things correctly when the user adds some * IP options mid-stream. Silly to do, but cover it. */ - if(tp->sack_ok && tp->num_sacks) - mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)); - if(sk->opt && sk->opt->optlen) - mss_now -= sk->opt->optlen; + mss_now = tcp_current_mss(sk); /* If we are zapped, the bytes will have to remain here. * In time closedown will empty the write queue and all @@ -439,14 +435,14 @@ } /* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used to speed up path mtu recovery. Note that - * these simple retransmits aren't counted in the usual tcp retransmit - * backoff counters. + * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. */ void tcp_simple_retransmit(struct sock *sk) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct sk_buff *skb; + unsigned int mss = tcp_current_mss(sk); /* Don't muck with the congestion window here. */ tp->dup_acks = 0; @@ -457,7 +453,10 @@ * and not use it for RTT calculation in the absence of * the timestamp option. */ - tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)); + for (skb = skb_peek(&sk->write_queue); skb != tp->send_head; + skb = skb->next) + if (skb->len > mss) + tcp_retransmit_skb(sk, skb); } static __inline__ void update_retrans_head(struct sock *sk) @@ -477,17 +476,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int current_mss = sk->mss; + unsigned int cur_mss = tcp_current_mss(sk); - /* Account for outgoing SACKS and IP options, if any. */ - if(tp->sack_ok && tp->num_sacks) - current_mss -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)); - if(sk->opt && sk->opt->optlen) - current_mss -= sk->opt->optlen; - - if(skb->len > current_mss) { - if(tcp_fragment(sk, skb, current_mss)) + if(skb->len > cur_mss) { + if(tcp_fragment(sk, skb, cur_mss)) return 1; /* We'll try again later. */ /* New SKB created, account for it. */ @@ -496,11 +488,11 @@ /* Collapse two adjacent packets if worthwhile and we can. */ if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && - (skb->len < (current_mss >> 1)) && + (skb->len < (cur_mss >> 1)) && (skb->next != tp->send_head) && (skb->next != (struct sk_buff *)&sk->write_queue) && (sysctl_tcp_retrans_collapse != 0)) - tcp_retrans_try_collapse(sk, skb, current_mss); + tcp_retrans_try_collapse(sk, skb, cur_mss); if(tp->af_specific->rebuild_header(sk)) return 1; /* Routing failure or similar. */ @@ -602,17 +594,14 @@ { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb = skb_peek_tail(&sk->write_queue); - int mss_now = sk->mss; + unsigned int mss_now; /* Optimization, tack on the FIN if we have a queue of * unsent frames. But be careful about outgoing SACKS * and IP options. */ - if(tp->sack_ok && tp->num_sacks) - mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)); - if(sk->opt && sk->opt->optlen) - mss_now -= sk->opt->optlen; + mss_now = tcp_current_mss(sk); + if((tp->send_head != NULL) && (skb->len < mss_now)) { /* tcp_write_xmit() takes care of the rest. */ TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; @@ -720,6 +709,9 @@ return 0; } +/* + * Prepare a SYN-ACK. + */ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct open_request *req, int mss) { @@ -792,7 +784,7 @@ skb->csum = 0; th->doff = (tcp_header_size >> 2); - tcp_statistics.TcpOutSegs++; + tcp_statistics.TcpOutSegs++; return skb; } diff -u --recursive --new-file v2.1.98/linux/net/ipv4/tcp_timer.c linux/net/ipv4/tcp_timer.c --- v2.1.98/linux/net/ipv4/tcp_timer.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipv4/tcp_timer.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.48 1998/04/06 08:42:30 davem Exp $ + * Version: $Id: tcp_timer.c,v 1.50 1998/04/14 09:08:59 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -32,6 +32,7 @@ static void tcp_syn_recv_timer(unsigned long); static void tcp_keepalive(unsigned long data); static void tcp_bucketgc(unsigned long); +static void tcp_twkill(unsigned long); struct timer_list tcp_slow_timer = { NULL, NULL, @@ -43,6 +44,7 @@ struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = { {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK */ {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive}, /* KEEPALIVE */ + {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}, /* TWKILL */ {ATOMIC_INIT(0), TCP_BUCKETGC_PERIOD, 0, tcp_bucketgc} /* BUCKETGC */ }; @@ -166,11 +168,10 @@ { struct sock *sk = (struct sock*)data; - if(sk->zapped) - return; - - if (sk->tp_pinfo.af_tcp.delayed_acks) - tcp_read_wakeup(sk); + if(!sk->zapped && + sk->tp_pinfo.af_tcp.delayed_acks && + sk->state != TCP_CLOSE) + tcp_send_ack(sk); } void tcp_probe_timer(unsigned long data) @@ -240,9 +241,9 @@ } /* Garbage collect TCP bind buckets. */ -static void tcp_bucketgc(unsigned long __unused) +static void tcp_bucketgc(unsigned long data) { - int i; + int i, reaped = 0;; for(i = 0; i < TCP_BHTABLE_SIZE; i++) { struct tcp_bind_bucket *tb = tcp_bound_hash[i]; @@ -252,8 +253,7 @@ if((tb->owners == NULL) && !(tb->flags & TCPB_FLAG_LOCKED)) { - /* Eat timer reference. */ - tcp_dec_slow_timer(TCP_SLT_BUCKETGC); + reaped++; /* Unlink bucket. */ if(tb->next) @@ -266,6 +266,92 @@ tb = next; } } + if(reaped != 0) { + struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data; + + /* Eat timer references. */ + atomic_sub(reaped, &slt->count); + } +} + +/* Kill off TIME_WAIT sockets once their lifetime has expired. */ +int tcp_tw_death_row_slot = 0; +static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] = + { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; + +extern void tcp_timewait_kill(struct tcp_tw_bucket *tw); + +static void tcp_twkill(unsigned long data) +{ + struct tcp_tw_bucket *tw; + int killed = 0; + + tw = tcp_tw_death_row[tcp_tw_death_row_slot]; + tcp_tw_death_row[tcp_tw_death_row_slot] = NULL; + while(tw != NULL) { + struct tcp_tw_bucket *next = tw->next_death; + + tcp_timewait_kill(tw); + killed++; + tw = next; + } + if(killed != 0) { + struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data; + atomic_sub(killed, &slt->count); + } + tcp_tw_death_row_slot = + ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); +} + +/* These are always called from BH context. See callers in + * tcp_input.c to verify this. + */ +void tcp_tw_schedule(struct tcp_tw_bucket *tw) +{ + int slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1); + + tw->death_slot = slot; + tw->next_death = tcp_tw_death_row[slot]; + tcp_tw_death_row[slot] = tw; + tcp_inc_slow_timer(TCP_SLT_TWKILL); +} + +/* Happens rarely if at all, no care about scalability here. */ +void tcp_tw_reschedule(struct tcp_tw_bucket *tw) +{ + struct tcp_tw_bucket *walk; + int slot = tw->death_slot; + + walk = tcp_tw_death_row[slot]; + if(walk == tw) { + tcp_tw_death_row[slot] = tw->next_death; + } else { + while(walk->next_death != tw) + walk = walk->next_death; + walk->next_death = tw->next_death; + } + slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1); + tw->death_slot = slot; + tw->next_death = tcp_tw_death_row[slot]; + tcp_tw_death_row[slot] = tw; + /* Timer was incremented when we first entered the table. */ +} + +/* This is for handling early-kills of TIME_WAIT sockets. */ +void tcp_tw_deschedule(struct tcp_tw_bucket *tw) +{ + struct tcp_tw_bucket *walk; + int slot = tw->death_slot; + + walk = tcp_tw_death_row[slot]; + if(walk == tw) { + tcp_tw_death_row[slot] = tw->next_death; + } else { + while(walk->next_death != tw) + walk = walk->next_death; + walk->next_death = tw->next_death; + } + tcp_dec_slow_timer(TCP_SLT_TWKILL); } /* @@ -511,14 +597,14 @@ slt->last = now; trigger = slt->period; } - next = min(next, trigger); - } - } - if (next != ~0UL) { - tcp_slow_timer.expires = now + next; - add_timer(&tcp_slow_timer); + /* Only reschedule if some events remain. */ + if (atomic_read(&slt->count)) + next = min(next, trigger); + } } + if (next != ~0UL) + mod_timer(&tcp_slow_timer, (now + next)); } void __tcp_inc_slow_timer(struct tcp_sl_timer *slt) @@ -531,9 +617,8 @@ when = now + slt->period; if (tcp_slow_timer.prev) { - if ((long)(tcp_slow_timer.expires - when) >= 0) { + if ((long)(tcp_slow_timer.expires - when) >= 0) mod_timer(&tcp_slow_timer, when); - } } else { tcp_slow_timer.expires = when; add_timer(&tcp_slow_timer); diff -u --recursive --new-file v2.1.98/linux/net/ipv6/ip6_fib.c linux/net/ipv6/ip6_fib.c --- v2.1.98/linux/net/ipv6/ip6_fib.c Thu Mar 26 15:57:13 1998 +++ linux/net/ipv6/ip6_fib.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: ip6_fib.c,v 1.12 1998/03/20 09:12:16 davem Exp $ + * $Id: ip6_fib.c,v 1.13 1998/04/28 06:22:03 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -694,8 +694,13 @@ /* * We can't tidy a case of two children. */ - - if (children > 1 || (fn->fn_flags & RTN_RTINFO)) + if (children > 1) { + if (fn->leaf == NULL) + goto split_repair; + break; + } + + if (fn->fn_flags & RTN_RTINFO) break; /* @@ -765,6 +770,8 @@ stree_node: rt6_release(fn->leaf); + +split_repair: rt = fib6_find_prefix(fn); if (rt == NULL) diff -u --recursive --new-file v2.1.98/linux/net/ipv6/ip6_output.c linux/net/ipv6/ip6_output.c --- v2.1.98/linux/net/ipv6/ip6_output.c Wed Apr 1 20:11:55 1998 +++ linux/net/ipv6/ip6_output.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: ip6_output.c,v 1.11 1998/03/28 08:29:39 davem Exp $ + * $Id: ip6_output.c,v 1.12 1998/04/11 22:11:06 davem Exp $ * * Based on linux/net/ipv4/ip_output.c * @@ -75,7 +75,6 @@ } else if (dst->neighbour) return dst->neighbour->output(skb); - printk(KERN_DEBUG "khm\n"); kfree_skb(skb); return -EINVAL; } diff -u --recursive --new-file v2.1.98/linux/net/ipv6/proc.c linux/net/ipv6/proc.c --- v2.1.98/linux/net/ipv6/proc.c Thu Mar 26 15:57:13 1998 +++ linux/net/ipv6/proc.c Tue Apr 28 11:10:11 1998 @@ -7,7 +7,7 @@ * PROC file system. This is very similar to the IPv4 version, * except it reports the sockets in the INET6 address family. * - * Version: $Id: proc.c,v 1.7 1998/03/18 07:52:13 davem Exp $ + * Version: $Id: proc.c,v 1.8 1998/04/13 17:06:03 davem Exp $ * * Authors: David S. Miller (davem@caip.rutgers.edu) * @@ -71,9 +71,17 @@ destp = ntohs(sp->dport); srcp = ntohs(sp->sport); if((format == 0) && (sp->state == TCP_TIME_WAIT)) { + extern int tcp_tw_death_row_slot; + int slot_dist; + timer_active1 = timer_active2 = 0; timer_active = 3; - timer_expires = tw->timer.expires; + slot_dist = tw->death_slot; + if(slot_dist > tcp_tw_death_row_slot) + slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot; + else + slot_dist = tcp_tw_death_row_slot - slot_dist; + timer_expires = jiffies + (slot_dist * TCP_TWKILL_PERIOD); } else { timer_active1 = del_timer(&tp->retransmit_timer); timer_active2 = del_timer(&sp->timer); diff -u --recursive --new-file v2.1.98/linux/net/ipv6/route.c linux/net/ipv6/route.c --- v2.1.98/linux/net/ipv6/route.c Thu Mar 26 15:57:13 1998 +++ linux/net/ipv6/route.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: route.c,v 1.27 1998/03/21 07:28:04 davem Exp $ + * $Id: route.c,v 1.28 1998/04/28 06:22:04 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -90,7 +90,11 @@ {{NULL, ATOMIC_INIT(1), ATOMIC_INIT(1), NULL, -1, 0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL, - ip6_pkt_discard, ip6_pkt_discard, &ip6_dst_ops}}, + ip6_pkt_discard, ip6_pkt_discard, +#ifdef CONFIG_NET_CLS_ROUTE + 0, +#endif + &ip6_dst_ops}}, NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0U, 255, 0, {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0} }; @@ -751,7 +755,7 @@ goto out; } - grt = rt6_lookup(gw_addr, NULL, dev->ifindex, RTF_LINKRT); + grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, RTF_LINKRT); if (grt == NULL || (grt->rt6i_flags&RTF_GATEWAY)) { *err = -EHOSTUNREACH; diff -u --recursive --new-file v2.1.98/linux/net/ipv6/tcp_ipv6.c linux/net/ipv6/tcp_ipv6.c --- v2.1.98/linux/net/ipv6/tcp_ipv6.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipv6/tcp_ipv6.c Tue Apr 28 11:10:11 1998 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: tcp_ipv6.c,v 1.76 1998/04/06 08:42:34 davem Exp $ + * $Id: tcp_ipv6.c,v 1.78 1998/04/16 16:29:22 freitag Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -48,7 +48,7 @@ static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); -static int tcp_v6_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static void tcp_v6_xmit(struct sk_buff *skb); static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, struct ipv6hdr *ip6h, @@ -403,7 +403,7 @@ if (err) { sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific; - sk->backlog_rcv = tcp_v6_backlog_rcv; + sk->backlog_rcv = tcp_v6_do_rcv; } return err; @@ -654,9 +654,6 @@ } -/* FIXME: this is substantially similar to the ipv4 code. - * Can some kind of merge be done? -- erics - */ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) { struct sk_buff * skb; @@ -1011,128 +1008,161 @@ tcp_synq_unlink(tp, req, prev); req->class->destructor(req); tcp_openreq_free(req); + net_statistics.EmbryonicRsts++; } -int tcp_v6_rcv(struct sk_buff *skb, struct device *dev, - struct in6_addr *saddr, struct in6_addr *daddr, - struct ipv6_options *opt, unsigned short len, - int redo, struct inet6_protocol *protocol) +static inline struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th; - struct sock *sk; + struct tcphdr *th = skb->h.th; + u32 flg = ((u32 *)th)[3]; - /* - * "redo" is 1 if we have already seen this skb but couldn't - * use it at that time (the socket was locked). In that case - * we have already done a lot of the work (looked up the socket - * etc). + /* Check for RST */ + if (flg & __constant_htonl(0x00040000)) { + tcp_v6_rst_req(sk, skb); + return NULL; + } + + /* Check SYN|ACK */ + if (flg & __constant_htonl(0x00120000)) { + struct open_request *req, *dummy; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + req = tcp_v6_search_req(tp, skb->nh.ipv6h,th, &dummy); + if (req) { + sk = tcp_check_req(sk, skb, req); + } +#if 0 /*def CONFIG_SYN_COOKIES */ + else { + sk = cookie_v6_check(sk, skb, (struct ipv6_options *) skb->cb); + } +#endif + } + return sk; +} + +static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + /* Imagine: socket is IPv6. IPv4 packet arrives, + goes to IPv4 receive handler and backlogged. + From backlog it always goes here. Kerboom... + Fortunately, tcp_rcv_established and rcv_established + handle them correctly, but it is not case with + tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK */ - th = skb->h.th; + if (skb->protocol == __constant_htons(ETH_P_IP)) + return tcp_v4_do_rcv(sk, skb); - sk = skb->sk; + /* + * socket locking is here for SMP purposes as backlog rcv + * is currently called with bh processing disabled. + */ - if (!redo) { - if (skb->pkt_type != PACKET_HOST) - goto discard_it; + /* XXX We need to think more about socket locking + * XXX wrt. backlog queues, __release_sock(), etc. -DaveM + */ + lock_sock(sk); - /* - * Pull up the IP header. - */ + /* + * This doesn't check if the socket has enough room for the packet. + * Either process the packet _without_ queueing it and then free it, + * or do the check later. + */ + skb_set_owner_r(skb, sk); - __skb_pull(skb, skb->h.raw - skb->data); + if (sk->state == TCP_ESTABLISHED) { /* Fast path */ + if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) + goto reset; + release_sock(sk); + return 0; + } - /* - * Count it even if it's bad. - */ + if (sk->state == TCP_LISTEN) { + struct sock *nsk; + + nsk = tcp_v6_hnd_req(sk, skb); + if (!nsk) + goto discard; + lock_sock(nsk); + release_sock(sk); + sk = nsk; + } - tcp_statistics.TcpInSegs++; + if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->cb, skb->len)) + goto reset; + release_sock(sk); + return 0; - /* - * Try to use the device checksum if provided. - */ +reset: + tcp_v6_send_reset(skb); +discard: + kfree_skb(skb); + release_sock(sk); + return 0; +} - switch (skb->ip_summed) { - case CHECKSUM_NONE: - skb->csum = csum_partial((char *)th, len, 0); - case CHECKSUM_HW: - if (tcp_v6_check(th,len,saddr,daddr,skb->csum)) { - printk(KERN_DEBUG "tcp csum failed\n"); - tcp_statistics.TcpInErrs++; - goto discard_it; - } - default: - /* CHECKSUM_UNNECESSARY */ - }; - - sk = __tcp_v6_lookup(th, saddr, th->source, daddr, th->dest, dev->ifindex); - - if (!sk) { - printk(KERN_DEBUG "socket not found\n"); - goto no_tcp_socket; - } +int tcp_v6_rcv(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + struct ipv6_options *opt, unsigned short len, + int redo, struct inet6_protocol *protocol) +{ + struct tcphdr *th; + struct sock *sk; - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - len - th->doff*4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - skb->used = 0; - if(sk->state == TCP_TIME_WAIT) - goto do_time_wait; + th = skb->h.th; - skb->sk = sk; - } + if (skb->pkt_type != PACKET_HOST) + goto discard_it; /* - * We may need to add it to the backlog here. + * Pull up the IP header. */ - if (sk->sock_readers) { - __skb_queue_tail(&sk->back_log, skb); - return(0); - } + __skb_pull(skb, skb->h.raw - skb->data); - skb_set_owner_r(skb, sk); + /* + * Count it even if it's bad. + */ - if (sk->state == TCP_ESTABLISHED) { - if (tcp_rcv_established(sk, skb, th, len)) - goto no_tcp_socket; - return 0; - } + tcp_statistics.TcpInSegs++; - if (sk->state == TCP_LISTEN) { - __u32 flg = ((u32 *)th)[3]; + /* + * Try to use the device checksum if provided. + */ - /* Check for RST */ - if (flg & __constant_htonl(0x00040000)) { - tcp_v6_rst_req(sk, skb); - } - - /* Check SYN|ACK */ - if (flg & __constant_htonl(0x00120000)) { - struct open_request *req, *prev; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - req = tcp_v6_search_req(tp, skb->nh.ipv6h,th,&prev); - if (req) { - sk = tcp_check_req(sk, skb, req); - } - /* else do syncookies (add them here) */ - if (sk == NULL) - goto discard_it; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + skb->csum = csum_partial((char *)th, len, 0); + case CHECKSUM_HW: + if (tcp_v6_check(th,len,saddr,daddr,skb->csum)) { + printk(KERN_DEBUG "tcp csum failed\n"); + tcp_statistics.TcpInErrs++; + goto discard_it; } - } + default: + /* CHECKSUM_UNNECESSARY */ + }; - if (tcp_rcv_state_process(sk, skb, th, opt, len) == 0) - return 0; + sk = __tcp_v6_lookup(th, saddr, th->source, daddr, th->dest, dev->ifindex); -no_tcp_socket: + if (!sk) + goto no_tcp_socket; - /* - * No such TCB. If th->rst is 0 send a reset - * (checked in tcp_v6_send_reset) - */ + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + len - th->doff*4); + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + skb->used = 0; + if(sk->state == TCP_TIME_WAIT) + goto do_time_wait; + + if (!sk->sock_readers) + return tcp_v6_do_rcv(sk, skb); + + __skb_queue_tail(&sk->back_log, skb); + return(0); +no_tcp_socket: tcp_v6_send_reset(skb); discard_it: @@ -1182,18 +1212,6 @@ return dst->error; } -static int tcp_v6_backlog_rcv(struct sock *sk, struct sk_buff *skb) -{ - int res; - - res = tcp_v6_rcv(skb, skb->dev, - &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, - (struct ipv6_options *) skb->cb, - skb->len, 1, - (struct inet6_protocol *) sk->pair); - return res; -} - static struct sock * tcp_v6_get_sock(struct sk_buff *skb, struct tcphdr *th) { struct in6_addr *saddr; @@ -1372,7 +1390,7 @@ tcp_v6_sendmsg, /* sendmsg */ tcp_recvmsg, /* recvmsg */ NULL, /* bind */ - tcp_v6_backlog_rcv, /* backlog_rcv */ + tcp_v6_do_rcv, /* backlog_rcv */ tcp_v6_hash, /* hash */ tcp_v6_unhash, /* unhash */ tcp_v6_rehash, /* rehash */ diff -u --recursive --new-file v2.1.98/linux/net/ipx/af_ipx.c linux/net/ipx/af_ipx.c --- v2.1.98/linux/net/ipx/af_ipx.c Tue Apr 14 14:29:26 1998 +++ linux/net/ipx/af_ipx.c Tue Apr 28 11:10:11 1998 @@ -48,6 +48,7 @@ * Revision 0.37: Began adding POSIXisms. * Revision 0.38: Asynchronous socket stuff made current. * Revision 0.39: SPX interfaces + * Revision 0.40: Tiny SIOCGSTAMP fix (chris@cybernet.co.nz) * * Protect the module by a MOD_INC_USE_COUNT/MOD_DEC_USE_COUNT * pair. Also, now usage count is managed this way @@ -627,6 +628,14 @@ if (ipx->ipx_source.net != intrfc->if_netnum) { + /* + * Unshare the buffer before modifying the count in + * case its a flood or tcpdump + */ + skb=skb_unshare(skb, GFP_ATOMIC); + if(!skb) + return 0; + ipx = skb->nh.ipxh; if (++(ipx->ipx_tctrl) > ipxcfg_max_hops) send_to_wire = 0; } @@ -725,7 +734,7 @@ } } - if( ipx->ipx_type == IPX_TYPE_PPROP && ipx->ipx_tctrl < 8 && skb->pkt_type == PACKET_HOST ) + if( ipx->ipx_type == IPX_TYPE_PPROP && ipx->ipx_tctrl < 8 && skb->pkt_type != PACKET_OTHERHOST ) { int i; ipx_interface *ifcs; @@ -2169,6 +2178,7 @@ copied); if (err) goto out_free; + sk->stamp=skb->stamp; msg->msg_namelen = sizeof(*sipx); @@ -2429,7 +2439,7 @@ * sockets be closed from user space. */ -__initfunc(static void ipx_proto_finito(void)) +static void ipx_proto_finito(void) { ipx_interface *ifc; diff -u --recursive --new-file v2.1.98/linux/net/netsyms.c linux/net/netsyms.c --- v2.1.98/linux/net/netsyms.c Tue Apr 14 14:29:27 1998 +++ linux/net/netsyms.c Thu Apr 30 09:37:23 1998 @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -41,6 +42,8 @@ #include #include #include + +extern int tcp_tw_death_row_slot; #endif #endif @@ -66,16 +69,14 @@ extern void destroy_8023_client(struct datalink_proto *); #endif -#ifdef CONFIG_IPV6_MODULE -#ifdef CONFIG_SYSCTL -extern int sysctl_max_syn_backlog; -#endif -#endif - #ifdef CONFIG_ATALK_MODULE #include #endif +#ifdef CONFIG_SYSCTL +extern int sysctl_max_syn_backlog; +#endif + EXPORT_SYMBOL(dev_lockct); /* Skbuff symbols. */ @@ -293,6 +294,8 @@ EXPORT_SYMBOL(tcp_transmit_skb); EXPORT_SYMBOL(tcp_connect); EXPORT_SYMBOL(tcp_make_synack); +EXPORT_SYMBOL(tcp_tw_death_row_slot); +EXPORT_SYMBOL(net_statistics); EXPORT_SYMBOL(xrlim_allow); @@ -320,6 +323,7 @@ #endif #ifdef CONFIG_RTNETLINK +EXPORT_SYMBOL(rtattr_parse); EXPORT_SYMBOL(rtnetlink_links); EXPORT_SYMBOL(__rta_fill); EXPORT_SYMBOL(rtnetlink_dump_ifinfo); @@ -442,9 +446,31 @@ EXPORT_SYMBOL(qdisc_reset); EXPORT_SYMBOL(qdisc_restart); EXPORT_SYMBOL(qdisc_head); +EXPORT_SYMBOL(qdisc_create_dflt); +EXPORT_SYMBOL(noop_qdisc); +#ifdef CONFIG_NET_SCHED +EXPORT_SYMBOL(pfifo_qdisc_ops); EXPORT_SYMBOL(register_qdisc); EXPORT_SYMBOL(unregister_qdisc); -EXPORT_SYMBOL(noop_qdisc); +EXPORT_SYMBOL(qdisc_get_rtab); +EXPORT_SYMBOL(qdisc_put_rtab); +#ifdef CONFIG_NET_ESTIMATOR +EXPORT_SYMBOL(qdisc_new_estimator); +EXPORT_SYMBOL(qdisc_kill_estimator); +#endif +#ifdef CONFIG_NET_POLICE +EXPORT_SYMBOL(tcf_police); +EXPORT_SYMBOL(tcf_police_locate); +EXPORT_SYMBOL(tcf_police_destroy); +#ifdef CONFIG_RTNETLINK +EXPORT_SYMBOL(tcf_police_dump); +#endif +#endif +#endif +#ifdef CONFIG_NET_CLS +EXPORT_SYMBOL(register_tcf_proto_ops); +EXPORT_SYMBOL(unregister_tcf_proto_ops); +#endif EXPORT_SYMBOL(register_gifconf); diff -u --recursive --new-file v2.1.98/linux/net/sched/Config.in linux/net/sched/Config.in --- v2.1.98/linux/net/sched/Config.in Mon Jan 12 15:28:28 1998 +++ linux/net/sched/Config.in Tue Apr 28 11:11:28 1998 @@ -3,9 +3,28 @@ # tristate 'CBQ packet scheduler' CONFIG_NET_SCH_CBQ tristate 'CSZ packet scheduler' CONFIG_NET_SCH_CSZ -#tristate 'HFQ packet scheduler' CONFIG_NET_SCH_HFQ -tristate 'RED queueing discipline' CONFIG_NET_SCH_RED -tristate 'SFQ queueing discipline' CONFIG_NET_SCH_SFQ -tristate 'auxiliary TBF queue' CONFIG_NET_SCH_TBF -tristate 'auxiliary FIFO queue' CONFIG_NET_SCH_PFIFO -tristate 'auxiliary PRIO queue' CONFIG_NET_SCH_PRIO +#tristate 'H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ +#tristate 'H-FSC packet scheduler' CONFIG_NET_SCH_HFCS +tristate 'The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO +tristate 'RED queue' CONFIG_NET_SCH_RED +tristate 'SFQ queue' CONFIG_NET_SCH_SFQ +tristate 'TEQL queue' CONFIG_NET_SCH_TEQL +tristate 'TBF queue' CONFIG_NET_SCH_TBF +bool 'QoS support' CONFIG_NET_QOS +if [ "$CONFIG_NET_QOS" = "y" ]; then + bool 'Rate estimator' CONFIG_NET_ESTIMATOR +fi +if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then + bool 'Packet classifier API' CONFIG_NET_CLS +fi +if [ "$CONFIG_NET_CLS" = "y" ]; then + bool 'Routing tables based classifier' CONFIG_NET_CLS_ROUTE +# bool 'Firewall based classifier' CONFIG_NET_CLS_FW + tristate 'U32 classifier' CONFIG_NET_CLS_U32 + if [ "$CONFIG_NET_QOS" = "y" ]; then + tristate 'Special RSVP classifier' CONFIG_NET_CLS_RSVP + tristate 'Special RSVP classifier for IPv6' CONFIG_NET_CLS_RSVP6 + bool 'Ingres traffic policing' CONFIG_NET_CLS_POLICE + fi +fi + diff -u --recursive --new-file v2.1.98/linux/net/sched/Makefile linux/net/sched/Makefile --- v2.1.98/linux/net/sched/Makefile Sun Nov 30 14:00:40 1997 +++ linux/net/sched/Makefile Tue Apr 28 11:10:11 1998 @@ -11,6 +11,23 @@ O_OBJS := sch_generic.o +ifeq ($(CONFIG_NET_SCHED), y) + +O_OBJS += sch_api.o sch_fifo.o + +ifeq ($(CONFIG_NET_ESTIMATOR), y) +O_OBJS += estimator.o +endif + +ifeq ($(CONFIG_NET_CLS), y) +O_OBJS += cls_api.o + +ifeq ($(CONFIG_NET_CLS_POLICE), y) +O_OBJS += police.o +endif + +endif + ifeq ($(CONFIG_NET_SCH_CBQ), y) O_OBJS += sch_cbq.o else @@ -27,6 +44,23 @@ endif endif +ifeq ($(CONFIG_NET_SCH_HPFQ), y) +O_OBJS += sch_hpfq.o +else + ifeq ($(CONFIG_NET_SCH_HPFQ), m) + M_OBJS += sch_hpfq.o + endif +endif + +ifeq ($(CONFIG_NET_SCH_HFSC), y) +O_OBJS += sch_hfsc.o +else + ifeq ($(CONFIG_NET_SCH_HFSC), m) + M_OBJS += sch_hfsc.o + endif +endif + + ifeq ($(CONFIG_NET_SCH_SFQ), y) O_OBJS += sch_sfq.o else @@ -51,21 +85,54 @@ endif endif +ifeq ($(CONFIG_NET_SCH_PRIO), y) +O_OBJS += sch_prio.o +else + ifeq ($(CONFIG_NET_SCH_PRIO), m) + M_OBJS += sch_prio.o + endif +endif -ifeq ($(CONFIG_NET_SCH_PFIFO), y) -O_OBJS += sch_fifo.o +ifeq ($(CONFIG_NET_SCH_TEQL), y) +O_OBJS += sch_teql.o else - ifeq ($(CONFIG_NET_SCH_PFIFO), m) - M_OBJS += sch_fifo.o + ifeq ($(CONFIG_NET_SCH_TEQL), m) + M_OBJS += sch_teql.o endif endif -ifeq ($(CONFIG_NET_SCH_PRIO), y) -O_OBJS += sch_prio.o +ifeq ($(CONFIG_NET_CLS_U32), y) +O_OBJS += cls_u32.o else - ifeq ($(CONFIG_NET_SCH_PRIO), m) - M_OBJS += sch_prio.o + ifeq ($(CONFIG_NET_CLS_U32), m) + M_OBJS += cls_u32.o + endif +endif + +ifeq ($(CONFIG_NET_CLS_RSVP), y) +O_OBJS += cls_rsvp.o +else + ifeq ($(CONFIG_NET_CLS_RSVP), m) + M_OBJS += cls_rsvp.o endif +endif + +ifeq ($(CONFIG_NET_CLS_RSVP6), y) +O_OBJS += cls_rsvp6.o +else + ifeq ($(CONFIG_NET_CLS_RSVP6), m) + M_OBJS += cls_rsvp6.o + endif +endif + +ifeq ($(CONFIG_NET_CLS_ROUTE), y) +O_OBJS += cls_route.o +endif + +ifeq ($(CONFIG_NET_CLS_FW), y) +O_OBJS += cls_fw.o +endif + endif include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.98/linux/net/sched/cls_api.c linux/net/sched/cls_api.c --- v2.1.98/linux/net/sched/cls_api.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/cls_api.c Tue Apr 28 11:10:11 1998 @@ -0,0 +1,432 @@ +/* + * net/sched/cls_api.c Packet classifier API. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The list of all installed classifier types */ + +static struct tcf_proto_ops *tcf_proto_base; + + +/* Find classifier type by string name */ + +struct tcf_proto_ops * tcf_proto_lookup_ops(struct rtattr *kind) +{ + struct tcf_proto_ops *t; + + if (kind) { + for (t = tcf_proto_base; t; t = t->next) { + if (rtattr_strcmp(kind, t->kind) == 0) + return t; + } + } + return NULL; +} + +/* Register(unregister) new classifier type */ + +int register_tcf_proto_ops(struct tcf_proto_ops *ops) +{ + struct tcf_proto_ops *t, **tp; + + for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) + if (strcmp(ops->kind, t->kind) == 0) + return -EEXIST; + + ops->next = NULL; + *tp = ops; + return 0; +} + +int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) +{ + struct tcf_proto_ops *t, **tp; + + for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) + if (t == ops) + break; + + if (!t) + return -ENOENT; + *tp = t->next; + return 0; +} + +#ifdef CONFIG_RTNETLINK + +static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, + struct tcf_proto *tp, unsigned long fh, int event); + + +/* Select new prio value from the range, managed by kernel. */ + +static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp, u32 prio) +{ + u32 first = TC_H_MAKE(0xC0000000U,0U); + + if (!tp || tp->next == NULL) + return first; + + if (prio == TC_H_MAKE(0xFFFF0000U,0U)) + first = tp->prio+1; + else + first = tp->prio-1; + + if (first == prio) + first = tp->prio; + + return first; +} + +/* Add/change/delete/get a filter node */ + +static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +{ + struct rtattr **tca = arg; + struct tcmsg *t = NLMSG_DATA(n); + u32 protocol = TC_H_MIN(t->tcm_info); + u32 prio = TC_H_MAJ(t->tcm_info); + u32 nprio = prio; + struct device *dev; + struct Qdisc *q; + struct tcf_proto **back, **chain; + struct tcf_proto *tp = NULL; + struct tcf_proto_ops *tp_ops; + struct Qdisc_class_ops *cops; + unsigned long cl = 0; + unsigned long fh; + int err; + + if (prio == 0) { + /* If no priority is given, user wants we allocated it. */ + if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) + return -ENOENT; + if (n->nlmsg_flags&NLM_F_APPEND) + prio = TC_H_MAKE(0xFFFF0000U,0U); + else + prio = TC_H_MAKE(0x80000000U,0U); + } + + /* Find head of filter chain. */ + + /* Find link */ + if ((dev = dev_get_by_index(t->tcm_ifindex)) == NULL) + return -ENODEV; + + /* Find qdisc */ + if (!t->tcm_parent) + q = dev->qdisc_sleeping; + else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL) + return -EINVAL; + + /* Is it classful? */ + if ((cops = q->ops->cl_ops) == NULL) + return -EINVAL; + + /* Do we search for filter, attached to class? */ + if (TC_H_MIN(t->tcm_parent)) { + cl = cops->get(q, t->tcm_parent); + if (cl == 0) + return -ENOENT; + } + + /* And the last stroke */ + chain = cops->tcf_chain(q, cl); + err = -EINVAL; + if (chain == NULL) + goto errout; + + /* Check the chain for existence of proto-tcf with this priority */ + for (back = chain; (tp=*back) != NULL; back = &tp->next) { + if (tp->prio >= prio) { + if (tp->prio == prio) { + if (!nprio || (tp->protocol != protocol && protocol)) + goto errout; + } else + tp = NULL; + break; + } + } + + if (tp == NULL) { + /* Proto-tcf does not exist, create new one */ + + if (tca[TCA_KIND-1] == NULL || !protocol) + goto errout; + + err = -ENOENT; + if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) + goto errout; + + + /* Create new proto tcf */ + + err = -ENOBUFS; + if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL) + goto errout; + tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]); + if (tp_ops == NULL) { + err = -EINVAL; + kfree(tp); + goto errout; + } + memset(tp, 0, sizeof(*tp)); + tp->ops = tp_ops; + tp->protocol = protocol; + tp->prio = nprio ? : tcf_auto_prio(*back, prio); + tp->q = q; + tp->classify = tp_ops->classify; + tp->classid = t->tcm_parent; + err = tp_ops->init(tp); + if (err) { + kfree(tp); + goto errout; + } + tp->next = *back; + *back = tp; + } else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind)) + goto errout; + + fh = tp->ops->get(tp, t->tcm_handle); + + if (fh == 0) { + if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { + *back = tp->next; + tp->ops->destroy(tp); + kfree(tp); + err = 0; + goto errout; + } + + err = -ENOENT; + if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) + goto errout; + } else { + switch (n->nlmsg_type) { + case RTM_NEWTFILTER: + err = -EEXIST; + if (n->nlmsg_flags&NLM_F_EXCL) + goto errout; + break; + case RTM_DELTFILTER: + err = tp->ops->delete(tp, fh); + goto errout; + case RTM_GETTFILTER: + err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + goto errout; + default: + err = -EINVAL; + goto errout; + } + } + + err = tp->ops->change(tp, t->tcm_handle, tca, &fh); + if (err == 0) + tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + +errout: + if (cl) + cops->put(q, cl); + return err; +} + +static int +tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, + u32 pid, u32 seq, unsigned flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); + nlh->nlmsg_flags = flags; + tcm = NLMSG_DATA(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm_ifindex = tp->q->dev->ifindex; + tcm->tcm_parent = tp->classid; + tcm->tcm_handle = 0; + tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); + RTA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind); + if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) + goto rtattr_failure; + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, + struct tcf_proto *tp, unsigned long fh, int event) +{ + struct sk_buff *skb; + pid_t pid = oskb ? NETLINK_CB(oskb).pid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); +} + +struct tcf_dump_args +{ + struct tcf_walker w; + struct sk_buff *skb; + struct netlink_callback *cb; +}; + +static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg) +{ + struct tcf_dump_args *a = (void*)arg; + + return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); +} + +static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) +{ + int t; + int s_t; + struct device *dev; + struct Qdisc *q; + struct tcf_proto *tp, **chain; + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); + unsigned long cl = 0; + struct Qdisc_class_ops *cops; + struct tcf_dump_args arg; + + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + return skb->len; + if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + return skb->len; + if ((q = qdisc_lookup(dev, tcm->tcm_parent)) == NULL) + return skb->len; + cops = q->ops->cl_ops; + if (TC_H_MIN(tcm->tcm_parent)) { + if (cops) + cl = cops->get(q, tcm->tcm_parent); + if (cl == 0) + goto errout; + } + chain = cops->tcf_chain(q, cl); + if (chain == NULL) + goto errout; + + s_t = cb->args[0]; + + for (tp=*chain, t=0; tp; tp = tp->next, t++) { + if (t < s_t) continue; + if (TC_H_MAJ(tcm->tcm_info) && + TC_H_MAJ(tcm->tcm_info) != tp->prio) + continue; + if (TC_H_MIN(tcm->tcm_info) && + TC_H_MIN(tcm->tcm_info) != tp->protocol) + continue; + if (t > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); + if (cb->args[1] == 0) { + if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) { + break; + } + cb->args[1] = 1; + } + if (tp->ops->walk == NULL) + continue; + arg.w.fn = tcf_node_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]-1; + arg.w.count = 0; + tp->ops->walk(tp, &arg.w); + cb->args[1] = arg.w.count+1; + if (arg.w.stop) + break; + } + + cb->args[0] = t; + +errout: + if (cl) + cops->put(q, cl); + + return skb->len; +} + +#endif + + +__initfunc(int tc_filter_init(void)) +{ +#ifdef CONFIG_RTNETLINK + struct rtnetlink_link *link_p = rtnetlink_links[AF_UNSPEC]; + + /* Setup rtnetlink links. It is made here to avoid + exporting large number of public symbols. + */ + + if (link_p) { + link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter; + link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter; + link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter; + link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter; + } +#endif +#define INIT_TC_FILTER(name) { \ + extern struct tcf_proto_ops cls_##name##_ops; \ + register_tcf_proto_ops(&cls_##name##_ops); \ + } + +#ifdef CONFIG_NET_CLS_U32 + INIT_TC_FILTER(u32); +#endif +#ifdef CONFIG_NET_CLS_ROUTE + INIT_TC_FILTER(route); +#endif +#ifdef CONFIG_NET_CLS_FW + INIT_TC_FILTER(fw); +#endif +#ifdef CONFIG_NET_CLS_RSVP + INIT_TC_FILTER(rsvp); +#endif +#ifdef CONFIG_NET_CLS_RSVP6 + INIT_TC_FILTER(rsvp6); +#endif + return 0; +} diff -u --recursive --new-file v2.1.98/linux/net/sched/cls_fw.c linux/net/sched/cls_fw.c --- v2.1.98/linux/net/sched/cls_fw.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/cls_fw.c Wed Apr 29 22:46:59 1998 @@ -0,0 +1,96 @@ +/* + * net/sched/cls_fw.c Routing table based packet classifier. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ +#if 0 /* XXX skb->fwmark, where is it? -DaveM */ + u32 clid = skb->fwmark; + + if (clid && (TC_H_MAJ(clid) == 0 || + !(TC_H_MAJ(clid^tp->q->handle)))) { + res->classid = clid; + res->class = 0; + return 0; + } +#endif + return -1; +} + +static unsigned long fw_get(struct tcf_proto *tp, u32 handle) +{ + return 0; +} + +static void fw_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int fw_init(struct tcf_proto *tp) +{ + return 0; +} + +static void fw_destroy(struct tcf_proto *tp) +{ +} + +static int fw_delete(struct tcf_proto *tp, unsigned long arg) +{ + return -EINVAL; +} + +static int fw_change(struct tcf_proto *tp, u32 handle, + struct rtattr **tca, + unsigned long *arg) +{ + return handle ? -EINVAL : 0; +} + +struct tcf_proto_ops fw_cls_ops = { + NULL, + "fw", + fw_classify, + fw_init, + fw_destroy, + + fw_get, + fw_put, + fw_change, + fw_delete, + NULL, +}; diff -u --recursive --new-file v2.1.98/linux/net/sched/cls_route.c linux/net/sched/cls_route.c --- v2.1.98/linux/net/sched/cls_route.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/cls_route.c Wed Apr 29 22:46:59 1998 @@ -0,0 +1,98 @@ +/* + * net/sched/cls_route.c Routing table based packet classifier. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static int route_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct dst_entry *dst = skb->dst; + + if (dst) { + u32 clid = dst->tclassid; + + if (clid && (TC_H_MAJ(clid) == 0 || + !(TC_H_MAJ(clid^tp->q->handle)))) { + res->classid = clid; + res->class = 0; + return 0; + } + } + return -1; +} + +static unsigned long route_get(struct tcf_proto *tp, u32 handle) +{ + return 0; +} + +static void route_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int route_init(struct tcf_proto *tp) +{ + return 0; +} + +static void route_destroy(struct tcf_proto *tp) +{ +} + +static int route_delete(struct tcf_proto *tp, unsigned long arg) +{ + return -EINVAL; +} + +static int route_change(struct tcf_proto *tp, u32 handle, + struct rtattr **tca, + unsigned long *arg) +{ + return handle ? -EINVAL : 0; +} + +struct tcf_proto_ops cls_route_ops = { + NULL, + "route", + route_classify, + route_init, + route_destroy, + + route_get, + route_put, + route_change, + route_delete, + NULL, +}; diff -u --recursive --new-file v2.1.98/linux/net/sched/cls_rsvp.c linux/net/sched/cls_rsvp.c --- v2.1.98/linux/net/sched/cls_rsvp.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/cls_rsvp.c Wed Apr 29 22:46:59 1998 @@ -0,0 +1,41 @@ +/* + * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RSVP_DST_LEN 1 +#define RSVP_ID "rsvp" +#define RSVP_OPS cls_rsvp_ops + +#include "cls_rsvp.h" diff -u --recursive --new-file v2.1.98/linux/net/sched/cls_rsvp.h linux/net/sched/cls_rsvp.h --- v2.1.98/linux/net/sched/cls_rsvp.h Wed Dec 31 16:00:00 1969 +++ linux/net/sched/cls_rsvp.h Wed Apr 29 22:46:59 1998 @@ -0,0 +1,672 @@ +/* + * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +/* + Comparing to general packet classification problem, + RSVP needs only sevaral relatively simple rules: + + * (dst, protocol) are always specified, + so that we are able to hash them. + * src may be exact, and may be wildcard, so that + we can keep hash table plus one wildcard entry. + * source port (or flow label) is important only if src is given. + + IMPLEMENTATION. + + We use two level hash table: top level is keyed by + destination address and protocol ID, every bucket contains list of + "rsvp sessions", identified by destination address, protocol + and DPI(="Destination Port ID"): triple (key, mask, offset). + + Every bucket has smaller hash table keyed by source address + (cf. RSVP flowspec) and one wildcard entry for wildcard reservations. + Every bucket is again list of "RSVP flows", selected by + source address and SPI(="Source Port ID" here rather than + "security parameter index"): triple (key, mask, offset). + + + NOTE 1. All the packets with IPv6 extension headers (but AH and ESP) + and all fragmented packets go to best-effort traffic class. + + + NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires + only one "Generalized Port Identifier". So that for classic + ah, esp (and udp,tcp) both *pi should coincide or one of them + should be wildcard. + + From the first sight, this redundancy is just waste of CPU + resources. But, DPI and SPI add possibility to assign different + priorities to GPIs. Look also note 4 about tunnels below. + + + NOTE 3. One complication is the case of tunneled packets. + We implement it as the following: if the first lookup + matches special session with "tunnelhdr" value not zero, + flowid contains not true flow ID, but tunnel ID (1...255). + In this case, we pull tunnelhdr bytes and restart lookup + with tunnel ID added to list of keys. Simple and stupid 8)8) + It's enough for PIMREG and IPIP. + + + NOTE 4. Two GPIs make possible to parse even GRE packets. + F.e. DPI can select ETH_P_IP (and necessary flags to make + tunnelhdr correct) in GRE protocol field and SPI matches + GRE key. Is it not nice? 8)8) + + + Well, as result, despite of simplicity, we get pretty + powerful clsssification engine. + */ + +#include + +struct rsvp_head +{ + u32 tmap[256/32]; + u32 hgenerator; + u8 tgenerator; + struct rsvp_session *ht[256]; +}; + +struct rsvp_session +{ + struct rsvp_session *next; + u32 dst[RSVP_DST_LEN]; + struct tc_rsvp_gpi dpi; + u8 protocol; + u8 tunnelid; + /* 16 (src,sport) hash slots, and one wildcard source slot */ + struct rsvp_filter *ht[16+1]; +}; + + +struct rsvp_filter +{ + struct rsvp_filter *next; + u32 src[RSVP_DST_LEN]; + struct tc_rsvp_gpi spi; + u8 tunnelhdr; + + struct tcf_result res; +#ifdef CONFIG_NET_CLS_POLICE + struct tcf_police *police; +#endif + + u32 handle; + struct rsvp_session *sess; +}; + +static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid) +{ + unsigned h = dst[RSVP_DST_LEN-1]; + h ^= h>>16; + h ^= h>>8; + return (h ^ protocol ^ tunnelid) & 0xFF; +} + +static __inline__ unsigned hash_src(u32 *src) +{ + unsigned h = src[RSVP_DST_LEN-1]; + h ^= h>>16; + h ^= h>>8; + h ^= h>>4; + return h & 0xF; +} + +static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; + struct rsvp_session *s; + struct rsvp_filter *f; + unsigned h1, h2; + u32 *dst, *src; + u8 protocol; + u8 tunnelid = 0; + u8 *xprt; +#if RSVP_DST_LEN == 4 + struct ipv6hdr *nhptr = skb->nh.ipv6h; +#else + struct iphdr *nhptr = skb->nh.iph; +#endif + +#ifndef __i386__ + if ((unsigned long)nhptr & 3) + return -1; +#endif + +restart: + +#if RSVP_DST_LEN == 4 + src = &nhptr->saddr.s6_addr32[0]; + dst = &nhptr->daddr.s6_addr32[0]; + protocol = nhptr->nexthdr; + xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr); +#else + src = &nhptr->saddr; + dst = &nhptr->daddr; + protocol = nhptr->protocol; + xprt = ((u8*)nhptr) + (nhptr->ihl<<2); + if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET)) + return -1; +#endif + + h1 = hash_dst(dst, protocol, tunnelid); + h2 = hash_src(src); + + for (s = sht[h1]; s; s = s->next) { + if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && + protocol == s->protocol && + !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) +#if RSVP_DST_LEN == 4 + && dst[0] == s->dst[0] + && dst[1] == s->dst[1] + && dst[2] == s->dst[2] +#endif + && tunnelid == s->tunnelid) { + + for (f = s->ht[h2]; f; f = f->next) { + if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && + !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) +#if RSVP_DST_LEN == 4 + && src[0] == f->src[0] + && src[1] == f->src[1] + && src[2] == f->src[2] +#endif + ) { +matched: + if (f->tunnelhdr == 0) { + *res = f->res; +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) + return tcf_police(skb, f->police); +#endif + return 0; + } else { + tunnelid = f->res.classid; + nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); + goto restart; + } + } + } + + /* And wildcard bucket... */ + if ((f = s->ht[16]) != NULL) + goto matched; + return -1; + } + } + return -1; +} + +static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) +{ + struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; + struct rsvp_session *s; + struct rsvp_filter *f; + unsigned h1 = handle&0xFF; + unsigned h2 = (handle>>8)&0xFF; + + if (h2 > 16) + return 0; + + for (s = sht[h1]; s; s = s->next) { + for (f = s->ht[h2]; f; f = f->next) { + if (f->handle == handle) + return (unsigned long)f; + } + } + return 0; +} + +static void rsvp_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int rsvp_init(struct tcf_proto *tp) +{ + struct rsvp_head *data; + + MOD_INC_USE_COUNT; + data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL); + if (data) { + memset(data, 0, sizeof(struct rsvp_head)); + tp->root = data; + return 0; + } + MOD_DEC_USE_COUNT; + return -ENOBUFS; +} + +static void rsvp_destroy(struct tcf_proto *tp) +{ + struct rsvp_head *data = xchg(&tp->root, NULL); + struct rsvp_session **sht; + int h1, h2; + + if (data == NULL) + return; + + sht = data->ht; + + for (h1=0; h1<256; h1++) { + struct rsvp_session *s; + + while ((s = sht[h1]) != NULL) { + + sht[h1] = s->next; + + for (h2=0; h2<=16; h2++) { + struct rsvp_filter *f; + + while ((f = s->ht[h2]) != NULL) { + unsigned long cl; + + s->ht[h2] = f->next; + if ((cl = xchg(&f->res.class, 0)) != 0) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); +#ifdef CONFIG_NET_CLS_POLICE + tcf_police_release(f->police); +#endif + kfree(f); + } + } + kfree(s); + } + } + kfree(data); + MOD_DEC_USE_COUNT; +} + +static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) +{ + struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg; + unsigned h = f->handle; + struct rsvp_session **sp; + struct rsvp_session *s = f->sess; + int i; + + for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) { + if (*fp == f) { + unsigned long cl; + + *fp = f->next; + if ((cl = xchg(&f->res.class, 0)) != 0) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + +#ifdef CONFIG_NET_CLS_POLICE + tcf_police_release(f->police); +#endif + + kfree(f); + + /* Strip tree */ + + for (i=0; i<=16; i++) + if (s->ht[i]) + return 0; + + /* OK, session has no flows */ + for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF]; + *sp; sp = &(*sp)->next) { + if (*sp == s) { + *sp = s->next; + kfree(s); + return 0; + } + } + + return 0; + } + } + return 0; +} + +static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) +{ + struct rsvp_head *data = tp->root; + int i = 0xFFFF; + + while (i-- > 0) { + u32 h; + if ((data->hgenerator += 0x10000) == 0) + data->hgenerator = 0x10000; + h = data->hgenerator|salt; + if (rsvp_get(tp, h) == 0) + return h; + } + return 0; +} + +static int tunnel_bts(struct rsvp_head *data) +{ + int n = data->tgenerator>>5; + u32 b = 1<<(data->tgenerator&0x1F); + + if (data->tmap[n]&b) + return 0; + data->tmap[n] |= b; + return 1; +} + +static void tunnel_recycle(struct rsvp_head *data) +{ + struct rsvp_session **sht = data->ht; + u32 tmap[256/32]; + int h1, h2; + + memset(tmap, 0, sizeof(tmap)); + + for (h1=0; h1<256; h1++) { + struct rsvp_session *s; + for (s = sht[h1]; s; s = s->next) { + for (h2=0; h2<=16; h2++) { + struct rsvp_filter *f; + + for (f = s->ht[h2]; f; f = f->next) { + if (f->tunnelhdr == 0) + continue; + data->tgenerator = f->res.classid; + tunnel_bts(data); + } + } + } + } + + memcpy(data->tmap, tmap, sizeof(tmap)); +} + +static u32 gen_tunnel(struct rsvp_head *data) +{ + int i, k; + + for (k=0; k<2; k++) { + for (i=255; i>0; i--) { + if (++data->tgenerator == 0) + data->tgenerator = 1; + if (tunnel_bts(data)) + return data->tgenerator; + } + tunnel_recycle(data); + } + return 0; +} + +static int rsvp_change(struct tcf_proto *tp, u32 handle, + struct rtattr **tca, + unsigned long *arg) +{ + struct rsvp_head *data = tp->root; + struct rsvp_filter *f, **fp; + struct rsvp_session *s, **sp; + struct tc_rsvp_pinfo *pinfo = NULL; + struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *tb[TCA_RSVP_MAX]; + unsigned h1, h2; + u32 *dst; + int err; + + if (opt == NULL) + return -EINVAL; + + if (rtattr_parse(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0) + return -EINVAL; + + if ((f = (struct rsvp_filter*)*arg) != NULL) { + /* Node exists: adjust only classid */ + + if (f->handle != handle && handle) + return -EINVAL; + if (tb[TCA_RSVP_CLASSID-1]) { + unsigned long cl = xchg(&f->res.class, 0); + if (cl) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]); + f->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q, f->res.classid); + } +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_RSVP_POLICE-1]) { + struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1]); + + tcf_police_release(xchg(&f->police, police)); + } +#endif + return 0; + } + + /* Now more serious part... */ + if (handle) + return -EINVAL; + if (tb[TCA_RSVP_DST-1] == NULL) + return -EINVAL; + + f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL); + if (f == NULL) + return -ENOBUFS; + + memset(f, 0, sizeof(*f)); + h2 = 16; + if (tb[TCA_RSVP_SRC-1]) { + err = -EINVAL; + if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src)) + goto errout; + memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src)); + h2 = hash_src(f->src); + } + if (tb[TCA_RSVP_PINFO-1]) { + err = -EINVAL; + if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo)) + goto errout; + pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]); + f->spi = pinfo->spi; + f->tunnelhdr = pinfo->tunnelhdr; + } + if (tb[TCA_RSVP_CLASSID-1]) { + err = -EINVAL; + if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4) + goto errout; + f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]); + } + + err = -EINVAL; + if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src)) + goto errout; + dst = RTA_DATA(tb[TCA_RSVP_DST-1]); + h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); + + err = -ENOMEM; + if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0) + goto errout; + + if (f->tunnelhdr) { + err = -EINVAL; + if (f->res.classid > 255) + goto errout; + + err = -ENOMEM; + if (f->res.classid == 0 && + (f->res.classid = gen_tunnel(data)) == 0) + goto errout; + } + + for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { + if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && + pinfo->protocol == s->protocol && + memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 +#if RSVP_DST_LEN == 4 + && dst[0] == s->dst[0] + && dst[1] == s->dst[1] + && dst[2] == s->dst[2] +#endif + && pinfo->tunnelid == s->tunnelid) { + +insert: + /* OK, we found appropriate session */ + + fp = &s->ht[h2]; + + f->sess = s; + if (f->tunnelhdr == 0) + f->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q, f->res.classid); +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_RSVP_POLICE-1]) + f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1]); +#endif + + for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) + if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) + break; + f->next = *fp; + *fp = f; + return 0; + } + } + + /* No session found. Create new one. */ + + err = -ENOBUFS; + s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL); + if (s == NULL) + goto errout; + memset(s, 0, sizeof(*s)); + memcpy(s->dst, dst, sizeof(*dst)); + s->dpi = pinfo->dpi; + s->protocol = pinfo->protocol; + s->tunnelid = pinfo->tunnelid; + for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) { + if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask) + break; + } + s->next = *sp; + *sp = s; + goto insert; + +errout: + if (f) + kfree(f); + return err; +} + +static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct rsvp_head *head = tp->root; + unsigned h, h1; + + if (arg->stop) + return; + + for (h = 0; h < 256; h++) { + struct rsvp_session *s; + + for (s = head->ht[h]; s; s = s->next) { + for (h1 = 0; h1 <= 16; h1++) { + struct rsvp_filter *f; + + for (f = s->ht[h1]; f; f = f->next) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(tp, (unsigned long)f, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } + } + } + } +} + +#ifdef CONFIG_RTNETLINK +static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct rsvp_head *head = tp->root; + struct rsvp_filter *f = (struct rsvp_filter*)fh; + struct rsvp_session *s; + unsigned char *b = skb->tail; + struct rtattr *rta; + struct tc_rsvp_pinfo pinfo; + + if (f == NULL) + return skb->len; + s = f->sess; + + t->tcm_handle = f->handle; + + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + + RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst); + pinfo.dpi = s->dpi; + pinfo.spi = f->spi; + pinfo.protocol = s->protocol; + pinfo.tunnelid = s->tunnelid; + pinfo.tunnelhdr = f->tunnelhdr; + RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); + if (f->res.classid) + RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid); + if (((f->handle>>8)&0xFF) != 16) + RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) { + struct rtattr * p_rta = (struct rtattr*)skb->tail; + + RTA_PUT(skb, TCA_RSVP_POLICE, 0, NULL); + + if (tcf_police_dump(skb, f->police) < 0) + goto rtattr_failure; + + p_rta->rta_len = skb->tail - (u8*)p_rta; + } +#endif + + rta->rta_len = skb->tail - b; + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +struct tcf_proto_ops RSVP_OPS = { + NULL, + RSVP_ID, + rsvp_classify, + rsvp_init, + rsvp_destroy, + + rsvp_get, + rsvp_put, + rsvp_change, + rsvp_delete, + rsvp_walk, +#ifdef CONFIG_RTNETLINK + rsvp_dump +#else + NULL +#endif +}; + +#ifdef MODULE +int init_module(void) +{ + return register_tcf_proto_ops(&RSVP_OPS); +} + +void cleanup_module(void) +{ + unregister_tcf_proto_ops(&RSVP_OPS); +} +#endif diff -u --recursive --new-file v2.1.98/linux/net/sched/cls_rsvp6.c linux/net/sched/cls_rsvp6.c --- v2.1.98/linux/net/sched/cls_rsvp6.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/cls_rsvp6.c Wed Apr 29 22:46:59 1998 @@ -0,0 +1,42 @@ +/* + * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RSVP_DST_LEN 4 +#define RSVP_ID "rsvp6" +#define RSVP_OPS cls_rsvp6_ops + +#include "cls_rsvp.h" diff -u --recursive --new-file v2.1.98/linux/net/sched/cls_u32.c linux/net/sched/cls_u32.c --- v2.1.98/linux/net/sched/cls_u32.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/cls_u32.c Tue Apr 28 11:10:11 1998 @@ -0,0 +1,704 @@ +/* + * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * + * The filters are packed to hash tables of key nodes + * with a set of 32bit key/mask pairs at every node. + * Nodes reference next level hash tables etc. + * + * This scheme is the best universal classifier + * I managed to invent; it is not super-fast, but it is not slow + * (provided you programmed it correctly), and enough general. + * And its relative speed grows, when number of rules becomes larger. + * + * Seems, it presents the best middle point between speed and + * managability both by human and by machine. + * + * It is especially useful for link sharing and link sharing, combined + * with QoS; pure RSVP need not such general approach and can use + * much simpler (and faster) schemes, sort of cls_rsvp.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } + + +struct tc_u_knode +{ + struct tc_u_knode *next; + u32 handle; + struct tc_u_hnode *ht_up; +#ifdef CONFIG_NET_CLS_POLICE + struct tcf_police *police; +#endif + struct tcf_result res; + struct tc_u_hnode *ht_down; + struct tc_u32_sel sel; +}; + +struct tc_u_hnode +{ + struct tc_u_hnode *next; + u32 handle; + struct tc_u_common *tp_c; + int refcnt; + unsigned divisor; + u32 hgenerator; + struct tc_u_knode *ht[1]; +}; + +struct tc_u_common +{ + struct tc_u_common *next; + struct tc_u_hnode *hlist; + struct Qdisc *q; + int refcnt; + u32 hgenerator; +}; + +static struct tc_u_common *u32_list; + +static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel) +{ + unsigned h = key & sel->hmask; + + h ^= h>>16; + h ^= h>>8; + return h; +} + +static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) +{ + struct { + struct tc_u_knode *knode; + u8 *ptr; + } stack[TC_U32_MAXDEPTH]; + + struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; + u8 *ptr = skb->nh.raw; + struct tc_u_knode *n; + int sdepth = 0; + int off2 = 0; + int sel = 0; + int i; + +#ifndef __i386__ + if ((unsigned long)ptr & 3) + return -1; +#endif + +next_ht: + n = ht->ht[sel]; + +next_knode: + if (n) { + struct tc_u32_key *key = n->sel.keys; + + for (i = n->sel.nkeys; i>0; i--, key++) { + if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { + n = n->next; + goto next_knode; + } + } + if (n->ht_down == NULL) { +check_terminal: + if (n->sel.flags&TC_U32_TERMINAL) { + *res = n->res; +#ifdef CONFIG_NET_CLS_POLICE + if (n->police) + return tcf_police(skb, n->police); +#endif + return 0; + } + n = n->next; + goto next_knode; + } + + /* PUSH */ + if (sdepth >= TC_U32_MAXDEPTH) + goto deadloop; + stack[sdepth].knode = n; + stack[sdepth].ptr = ptr; + sdepth++; + + ht = n->ht_down; + sel = 0; + if (ht->divisor) + sel = ht->divisor&u32_hash_fold(*(u32*)(ptr+n->sel.hoff), &n->sel); + + if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) + goto next_ht; + + if (n->sel.flags&(TC_U32_EAT|TC_U32_VAROFFSET)) { + off2 = n->sel.off + 3; + if (n->sel.flags&TC_U32_VAROFFSET) + off2 += ntohs(n->sel.offmask & *(u16*)(ptr+n->sel.offoff)) >>n->sel.offshift; + off2 &= ~3; + } + if (n->sel.flags&TC_U32_EAT) { + ptr += off2; + off2 = 0; + } + + if (ptr < skb->tail) + goto next_ht; + } + + /* POP */ + if (sdepth--) { + n = stack[sdepth].knode; + ht = n->ht_up; + ptr = stack[sdepth].ptr; + goto check_terminal; + } + return -1; + +deadloop: + if (net_ratelimit()) + printk("cls_u32: dead loop\n"); + return -1; +} + +static __inline__ struct tc_u_hnode * +u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) +{ + struct tc_u_hnode *ht; + + for (ht = tp_c->hlist; ht; ht = ht->next) + if (ht->handle == handle) + break; + + return ht; +} + +static __inline__ struct tc_u_knode * +u32_lookup_key(struct tc_u_hnode *ht, u32 handle) +{ + unsigned sel; + struct tc_u_knode *n; + + sel = TC_U32_HASH(handle); + if (sel > ht->divisor) + return 0; + + for (n = ht->ht[sel]; n; n = n->next) + if (n->handle == handle) + return n; + + return NULL; +} + + +static unsigned long u32_get(struct tcf_proto *tp, u32 handle) +{ + struct tc_u_hnode *ht; + struct tc_u_common *tp_c = tp->data; + + if (TC_U32_HTID(handle) == TC_U32_ROOT) + ht = tp->root; + else + ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); + + if (!ht) + return 0; + + if (TC_U32_KEY(handle) == 0) + return (unsigned long)ht; + + return (unsigned long)u32_lookup_key(ht, handle); +} + +static void u32_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static u32 gen_new_htid(struct tc_u_common *tp_c) +{ + int i = 0x800; + + do { + if (++tp_c->hgenerator == 0x7FF) + tp_c->hgenerator = 1; + } while (i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); + + return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; +} + +static int u32_init(struct tcf_proto *tp) +{ + struct tc_u_hnode *root_ht; + struct tc_u_common *tp_c; + + MOD_INC_USE_COUNT; + + for (tp_c = u32_list; tp_c; tp_c = tp_c->next) + if (tp_c->q == tp->q) + break; + + root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL); + if (root_ht == NULL) { + MOD_DEC_USE_COUNT; + return -ENOBUFS; + } + memset(root_ht, 0, sizeof(*root_ht)); + root_ht->divisor = 0; + root_ht->refcnt++; + root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; + + if (tp_c == NULL) { + tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL); + if (tp_c == NULL) { + kfree(root_ht); + MOD_DEC_USE_COUNT; + return -ENOBUFS; + } + memset(tp_c, 0, sizeof(*tp_c)); + tp_c->q = tp->q; + tp_c->next = u32_list; + u32_list = tp_c; + } + + tp_c->refcnt++; + root_ht->next = tp_c->hlist; + tp_c->hlist = root_ht; + root_ht->tp_c = tp_c; + + tp->root = root_ht; + tp->data = tp_c; + return 0; +} + +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) +{ + unsigned long cl; + + if ((cl = xchg(&n->res.class, 0)) != 0) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); +#ifdef CONFIG_NET_CLS_POLICE + tcf_police_release(n->police); +#endif + if (n->ht_down) + n->ht_down->refcnt--; + kfree(n); + return 0; +} + +static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) +{ + struct tc_u_knode **kp; + struct tc_u_hnode *ht = key->ht_up; + + if (ht) { + for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) { + if (*kp == key) { + *kp = key->next; + u32_destroy_key(tp, key); + return 0; + } + } + } + BUG_TRAP(0); + return 0; +} + +static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +{ + struct tc_u_knode *n; + unsigned h; + + for (h=0; h<=ht->divisor; h++) { + while ((n = ht->ht[h]) != NULL) { + ht->ht[h] = n->next; + u32_destroy_key(tp, n); + } + } +} + +static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +{ + struct tc_u_common *tp_c = tp->data; + struct tc_u_hnode **hn; + + BUG_TRAP(!ht->refcnt); + + u32_clear_hnode(tp, ht); + + for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) { + if (*hn == ht) { + *hn = ht->next; + kfree(ht); + return 0; + } + } + + BUG_TRAP(0); + return -ENOENT; +} + +static void u32_destroy(struct tcf_proto *tp) +{ + struct tc_u_common *tp_c = tp->data; + struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); + + BUG_TRAP(root_ht != NULL); + + if (root_ht && --root_ht->refcnt == 0) + u32_destroy_hnode(tp, root_ht); + + if (--tp_c->refcnt == 0) { + struct tc_u_hnode *ht; + struct tc_u_common **tp_cp; + + for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) { + if (*tp_cp == tp_c) { + *tp_cp = tp_c->next; + break; + } + } + + for (ht=tp_c->hlist; ht; ht = ht->next) + u32_clear_hnode(tp, ht); + + while ((ht = tp_c->hlist) != NULL) { + tp_c->hlist = ht->next; + + BUG_TRAP(ht->refcnt == 0); + + kfree(ht); + }; + + kfree(tp_c); + } + + tp->data = NULL; +} + +static int u32_delete(struct tcf_proto *tp, unsigned long arg) +{ + struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; + + if (ht == NULL) + return 0; + + if (TC_U32_KEY(ht->handle)) + return u32_delete_key(tp, (struct tc_u_knode*)ht); + + if (tp->root == ht) + return -EINVAL; + + if (--ht->refcnt == 0) + u32_destroy_hnode(tp, ht); + + return 0; +} + +static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) +{ + struct tc_u_knode *n; + unsigned i = 0x7FF; + + for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) + if (i < TC_U32_NODE(n->handle)) + i = TC_U32_NODE(n->handle); + i++; + + return handle|(i>0xFFF ? 0xFFF : i); +} + +static int u32_set_parms(struct Qdisc *q, struct tc_u_hnode *ht, + struct tc_u_knode *n, struct rtattr **tb) +{ + if (tb[TCA_U32_LINK-1]) { + u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]); + struct tc_u_hnode *ht_down = NULL; + + if (TC_U32_KEY(handle)) + return -EINVAL; + + if (handle) { + ht_down = u32_lookup_ht(ht->tp_c, handle); + + if (ht_down == NULL) + return -EINVAL; + ht_down->refcnt++; + } + + ht_down = xchg(&n->ht_down, ht_down); + + if (ht_down) + ht_down->refcnt--; + } + if (tb[TCA_U32_CLASSID-1]) { + unsigned long cl = xchg(&n->res.class, 0); + if (cl) + q->ops->cl_ops->unbind_tcf(q, cl); + n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]); + n->res.class = q->ops->cl_ops->bind_tcf(q, n->res.classid); + } +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_U32_POLICE-1]) { + struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1]); + + tcf_police_release(xchg(&n->police, police)); + } +#endif + return 0; +} + +static int u32_change(struct tcf_proto *tp, u32 handle, + struct rtattr **tca, + unsigned long *arg) +{ + struct tc_u_common *tp_c = tp->data; + struct tc_u_hnode *ht; + struct tc_u_knode *n; + struct tc_u32_sel *s; + struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *tb[TCA_U32_MAX]; + u32 htid; + int err; + + if (opt == NULL) + return handle ? -EINVAL : 0; + + if (rtattr_parse(tb, TCA_U32_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0) + return -EINVAL; + + if ((n = (struct tc_u_knode*)*arg) != NULL) { + if (TC_U32_KEY(n->handle) == 0) + return -EINVAL; + + return u32_set_parms(tp->q, n->ht_up, n, tb); + } + + if (tb[TCA_U32_DIVISOR-1]) { + unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]); + + if (--divisor > 0x100) + return -EINVAL; + if (TC_U32_KEY(handle)) + return -EINVAL; + if (handle == 0) { + handle = gen_new_htid(tp->data); + if (handle == 0) + return -ENOMEM; + } + ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); + if (ht == NULL) + return -ENOBUFS; + memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*)); + ht->tp_c = tp_c; + ht->refcnt = 0; + ht->divisor = divisor; + ht->handle = handle; + ht->next = tp_c->hlist; + tp_c->hlist = ht; + *arg = (unsigned long)ht; + return 0; + } + + if (tb[TCA_U32_HASH-1]) { + htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]); + if (TC_U32_HTID(handle) == TC_U32_ROOT) { + ht = tp->root; + htid = ht->handle; + } else { + ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); + if (ht == NULL) + return -EINVAL; + } + } else { + ht = tp->root; + htid = ht->handle; + } + + if (ht->divisor < TC_U32_HASH(htid)) + return -EINVAL; + + if (handle) { + if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) + return -EINVAL; + if (TC_U32_HASH(handle) && TC_U32_HASH(handle^htid)) + return -EINVAL; + handle = htid | TC_U32_NODE(handle); + } else + handle = gen_new_kid(ht, htid); + + if (tb[TCA_U32_SEL-1] == 0 || + RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel)) + return -EINVAL; + + s = RTA_DATA(tb[TCA_U32_SEL-1]); + n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); + if (n == NULL) + return -ENOBUFS; + memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + n->ht_up = ht; + n->handle = handle; + err = u32_set_parms(tp->q, ht, n, tb); + if (err == 0) { + struct tc_u_knode **ins; + for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) + if (TC_U32_NODE(handle) >= TC_U32_NODE((*ins)->handle)) + break; + n->next = *ins; + *ins = n; + *arg = (unsigned long)n; + return 0; + } + kfree(n); + return err; +} + +static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct tc_u_common *tp_c = tp->data; + struct tc_u_hnode *ht; + struct tc_u_knode *n; + unsigned h; + + if (arg->stop) + return; + + for (ht = tp_c->hlist; ht; ht = ht->next) { + if (arg->count >= arg->skip) { + if (arg->fn(tp, (unsigned long)ht, arg) < 0) { + arg->stop = 1; + return; + } + } + arg->count++; + for (h = 0; h <= ht->divisor; h++) { + for (n = ht->ht[h]; n; n = n->next) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(tp, (unsigned long)n, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + } + } +} + +#ifdef CONFIG_RTNETLINK +static int u32_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct tc_u_knode *n = (struct tc_u_knode*)fh; + unsigned char *b = skb->tail; + struct rtattr *rta; + + if (n == NULL) + return skb->len; + + t->tcm_handle = n->handle; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + + if (TC_U32_KEY(n->handle) == 0) { + struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; + u32 divisor = ht->divisor+1; + RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor); + } else { + RTA_PUT(skb, TCA_U32_SEL, + sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), + &n->sel); + if (n->ht_up) { + u32 htid = n->handle & 0xFFFFF000; + RTA_PUT(skb, TCA_U32_HASH, 4, &htid); + } + if (n->res.classid) + RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid); + if (n->ht_down) + RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle); +#ifdef CONFIG_NET_CLS_POLICE + if (n->police) { + struct rtattr * p_rta = (struct rtattr*)skb->tail; + + RTA_PUT(skb, TCA_U32_POLICE, 0, NULL); + + if (tcf_police_dump(skb, n->police) < 0) + goto rtattr_failure; + + p_rta->rta_len = skb->tail - (u8*)p_rta; + } +#endif + } + + rta->rta_len = skb->tail - b; + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +struct tcf_proto_ops cls_u32_ops = { + NULL, + "u32", + u32_classify, + u32_init, + u32_destroy, + + u32_get, + u32_put, + u32_change, + u32_delete, + u32_walk, +#ifdef CONFIG_RTNETLINK + u32_dump +#else + NULL +#endif +}; + +#ifdef MODULE +int init_module(void) +{ + return register_tcf_proto_ops(&cls_u32_ops); +} + +void cleanup_module(void) +{ + unregister_tcf_proto_ops(&cls_u32_ops); +} +#endif diff -u --recursive --new-file v2.1.98/linux/net/sched/estimator.c linux/net/sched/estimator.c --- v2.1.98/linux/net/sched/estimator.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/estimator.c Wed Apr 29 22:46:59 1998 @@ -0,0 +1,183 @@ +/* + * net/sched/estimator.c Simple rate estimator. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + This text is NOT intended to be used for statistics collection, + its purpose is to provide base for statistical multiplexing + for controlled load service. + If you need only statistics, run user level daemon, which will + periodically read byte counters. + + Unfortunately, rate estimation is not very easy task. + F.e. I did not find a simple way to estimate current peak rate + and even failed to formulate the problem 8)8) + + So that I preferred not to built estimator in scheduler, + but run this task separately. + Ideally, it should be kernel thread(s), but for now it runs + from timers, which puts apparent top bounds on number of rated + flows, but has minimal overhead on small, but enough + to handle controlled load service, sets of aggregates. + + We measure rate over A=(1<next) { + u64 nbytes = e->stats->bytes; + u32 npackets = e->stats->packets; + u32 rate; + + rate = (nbytes - e->last_bytes)<<(7 - idx); + e->last_bytes = nbytes; + e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; + e->stats->bps = (e->avbps+0xF)>>5; + + rate = (npackets - e->last_packets)<<(12 - idx); + e->last_packets = npackets; + e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; + e->stats->pps = (e->avpps+0x1FF)>>10; + } + + elist[idx].timer.expires = jiffies + ((HZ/4)<interval < -2 || parm->interval > 3) + return -EINVAL; + + est = kmalloc(sizeof(*est), GFP_KERNEL); + if (est == NULL) + return -ENOBUFS; + + memset(est, 0, sizeof(*est)); + est->interval = parm->interval + 2; + est->stats = stats; + est->ewma_log = parm->ewma_log; + est->last_bytes = stats->bytes; + est->avbps = stats->bps<<5; + est->last_packets = stats->packets; + est->avpps = stats->pps<<10; + + est->next = elist[est->interval].list; + if (est->next == NULL) { + init_timer(&elist[est->interval].timer); + elist[est->interval].timer.data = est->interval; + elist[est->interval].timer.expires = jiffies + ((HZ/4)<interval); + elist[est->interval].timer.function = est_timer; + add_timer(&elist[est->interval].timer); + } + elist[est->interval].list = est; + return 0; +} + +void qdisc_kill_estimator(struct tc_stats *stats) +{ + int idx; + struct qdisc_estimator *est, **pest; + + for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { + int killed = 0; + pest = &elist[idx].list; + while ((est=*pest) != NULL) { + if (est->stats != stats) { + pest = &est->next; + continue; + } + /* ATOMIC_SET */ + *pest = est->next; + kfree(est); + killed++; + } + if (killed && elist[idx].list == NULL) + del_timer(&elist[idx].timer); + } +} + diff -u --recursive --new-file v2.1.98/linux/net/sched/police.c linux/net/sched/police.c --- v2.1.98/linux/net/sched/police.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/police.c Tue Apr 28 11:10:11 1998 @@ -0,0 +1,196 @@ +/* + * net/sched/police.c Input police filter. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } + +#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) +#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) + +static u32 idx_gen; +static struct tcf_police *tcf_police_ht[16]; + +static __inline__ unsigned tcf_police_hash(u32 index) +{ + return index&0xF; +} + +static __inline__ struct tcf_police * tcf_police_lookup(u32 index) +{ + struct tcf_police *p; + + for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { + if (p->index == index) + return p; + } + return NULL; +} + +static __inline__ u32 tcf_police_new_index(void) +{ + do { + if (++idx_gen == 0) + idx_gen = 1; + } while (tcf_police_lookup(idx_gen)); + + return idx_gen; +} + + +void tcf_police_destroy(struct tcf_police *p) +{ + unsigned h = tcf_police_hash(p->index); + struct tcf_police **p1p; + + for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { + if (*p1p == p) { + *p1p = p->next; + if (p->R_tab) + qdisc_put_rtab(p->R_tab); + if (p->P_tab) + qdisc_put_rtab(p->P_tab); + kfree(p); + return; + } + } + BUG_TRAP(0); +} + +struct tcf_police * tcf_police_locate(struct rtattr *rta) +{ + unsigned h; + struct tcf_police *p; + struct rtattr *tb[TCA_POLICE_MAX]; + struct tc_police *parm; + + if (rtattr_parse(tb, TCA_POLICE_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta)) < 0) + return NULL; + + if (tb[TCA_POLICE_TBF-1] == NULL) + return NULL; + + parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); + + if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { + p->refcnt++; + return p; + } + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + + memset(p, 0, sizeof(*p)); + p->refcnt = 1; + if ((p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1])) == NULL) + goto failure; + if (parm->peakrate.rate && + (p->P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1])) == NULL) + goto failure; + p->toks = p->burst = parm->burst; + p->mtu = parm->mtu; + if (p->mtu == 0) + p->mtu = 255<R_tab->rate.cell_log; + if (p->P_tab) + p->ptoks = L2T_P(p, p->mtu); + PSCHED_GET_TIME(p->t_c); + p->index = parm->index ? : tcf_police_new_index(); + p->action = parm->action; + h = tcf_police_hash(p->index); + p->next = tcf_police_ht[h]; + tcf_police_ht[h] = p; + return p; + +failure: + if (p->R_tab) + qdisc_put_rtab(p->R_tab); + kfree(p); + return NULL; +} + +int tcf_police(struct sk_buff *skb, struct tcf_police *p) +{ + psched_time_t now; + long toks; + long ptoks = 0; + + if (skb->len <= p->mtu) { + PSCHED_GET_TIME(now); + + toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst, 0); + + if (p->P_tab) { + ptoks = toks + p->ptoks; + if (ptoks > (long)L2T_P(p, p->mtu)) + ptoks = (long)L2T_P(p, p->mtu); + ptoks -= L2T_P(p, skb->len); + } + toks += p->toks; + if (toks > (long)p->burst) + toks = p->burst; + toks -= L2T(p, skb->len); + + if ((toks|ptoks) >= 0) { + p->t_c = now; + p->toks = toks; + p->ptoks = ptoks; + return TC_POLICE_OK; + } + } + + return p->action; +} + +#ifdef CONFIG_RTNETLINK +int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) +{ + unsigned char *b = skb->tail; + struct tc_police opt; + + opt.index = p->index; + opt.action = p->action; + opt.mtu = p->mtu; + opt.burst = p->burst; + opt.rate = p->R_tab->rate; + if (p->P_tab) + opt.peakrate = p->P_tab->rate; + else + memset(&opt.peakrate, 0, sizeof(opt.peakrate)); + RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_api.c linux/net/sched/sch_api.c --- v2.1.98/linux/net/sched/sch_api.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_api.c Tue Apr 28 11:10:11 1998 @@ -0,0 +1,994 @@ +/* + * net/sched/sch_api.c Packet scheduler API. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } + +#ifdef CONFIG_RTNETLINK +static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, + struct Qdisc *old, struct Qdisc *new); +static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, + struct Qdisc *q, unsigned long cl, int event); +#endif + +/* + + Short review. + ------------- + + This file consists of two interrelated parts: + + 1. queueing disciplines manager frontend. + 2. traffic classes manager frontend. + + Generally, queueing discipline ("qdisc") is a black box, + which is able to enqueue packets and to dequeue them (when + device is ready to send something) in order and at times + determined by algorithm hidden in it. + + qdisc's are divided to two categories: + - "queues", which have no internal structure visible from outside. + - "schedulers", which split all the packets to "traffic classes", + using "packet classifiers" (look at cls_api.c) + + In turn, classes may have child qdiscs (as rule, queues) + attached to them etc. etc. etc. + + The goal of the routines in this file is to translate + information supplied by user in the form of handles + to more intelligible for kernel form, to make some sanity + checks and part of work, which is common to all qdiscs + and to provide rtnetlink notifications. + + All real intelligent work is done inside qdisc modules. + + + + Every discipline has two major routines: enqueue and dequeue. + + ---dequeue + + dequeue usually returns a skb to send. It is allowed to return NULL, + but it does not mean that queue is empty, it just means that + discipline does not want to send anything this time. + Queue is really empty if q->q.qlen == 0. + For complicated disciplines with multiple queues q->q is not + real packet queue, but however q->q.qlen must be valid. + + ---enqueue + + enqueue returns number of enqueued packets i.e. this number is 1, + if packet was enqueued sucessfully and <1 if something (not + necessary THIS packet) was dropped. + + Auxiliary routines: + + ---requeue + + requeues once dequeued packet. It is used for non-standard or + just buggy devices, which can defer output even if dev->tbusy=0. + + ---reset + + returns qdisc to initial state: purge all buffers, clear all + timers, counters (except for statistics) etc. + + ---init + + initializes newly created qdisc. + + ---destroy + + destroys resources allocated by init and during lifetime of qdisc. + */ + +/************************************************ + * Queueing disciplines manipulation. * + ************************************************/ + + +/* The list of all installed queueing disciplines. */ + +static struct Qdisc_ops *qdisc_base = NULL; + +/* Register/uregister queueing discipline */ + +int register_qdisc(struct Qdisc_ops *qops) +{ + struct Qdisc_ops *q, **qp; + + for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) + if (strcmp(qops->id, q->id) == 0) + return -EEXIST; + + if (qops->enqueue == NULL) + qops->enqueue = noop_qdisc_ops.enqueue; + if (qops->requeue == NULL) + qops->requeue = noop_qdisc_ops.requeue; + if (qops->dequeue == NULL) + qops->dequeue = noop_qdisc_ops.dequeue; + + qops->next = NULL; + *qp = qops; + return 0; +} + +int unregister_qdisc(struct Qdisc_ops *qops) +{ + struct Qdisc_ops *q, **qp; + for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) + if (q == qops) + break; + if (!q) + return -ENOENT; + *qp = q->next; + q->next = NULL; + return 0; +} + +/* We know handle. Find qdisc among all qdisc's attached to device + (root qdisc, all its children, children of children etc.) + */ + +struct Qdisc *qdisc_lookup(struct device *dev, u32 handle) +{ + struct Qdisc *q; + + for (q = dev->qdisc_list; q; q = q->next) { + if (q->handle == handle) + return q; + } + return NULL; +} + +/* We know classid. Find qdisc among all qdisc's attached to device + (root qdisc, all its children, children of children etc.) + */ + +struct Qdisc *qdisc_lookup_class(struct device *dev, u32 classid) +{ + struct Qdisc *q; + + for (q = dev->qdisc_list; q; q = q->next) { + if (q->classid == classid) + return q; + } + return NULL; +} + + +/* Find queueing discipline by name */ + +struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) +{ + struct Qdisc_ops *q; + + if (kind) { + for (q = qdisc_base; q; q = q->next) { + if (rtattr_strcmp(kind, q->id) == 0) + return q; + } + } + return NULL; +} + +static struct qdisc_rate_table *qdisc_rtab_list; + +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab) +{ + struct qdisc_rate_table *rtab; + + for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { + if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) { + rtab->refcnt++; + return rtab; + } + } + + if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024) + return NULL; + + rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); + if (rtab) { + rtab->rate = *r; + rtab->refcnt = 1; + memcpy(rtab->data, RTA_DATA(tab), 1024); + rtab->next = qdisc_rtab_list; + qdisc_rtab_list = rtab; + } + return rtab; +} + +void qdisc_put_rtab(struct qdisc_rate_table *tab) +{ + struct qdisc_rate_table *rtab, **rtabp; + + if (!tab || --tab->refcnt) + return; + + for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) { + if (rtab == tab) { + *rtabp = rtab->next; + kfree(rtab); + return; + } + } +} + + +/* Allocate an unique handle from space managed by kernel */ + +u32 qdisc_alloc_handle(struct device *dev) +{ + int i = 0x10000; + static u32 autohandle = TC_H_MAKE(0x80000000U, 0); + + do { + autohandle += TC_H_MAKE(0x10000U, 0); + if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) + autohandle = TC_H_MAKE(0x80000000U, 0); + } while (qdisc_lookup(dev, autohandle) && --i > 0); + + return i>0 ? autohandle : 0; +} + +/* Graft qdisc "new" to class "classid" of qdisc "parent" or + to device "dev". + + Old qdisc is not destroyed but returned in *old. + */ + +int qdisc_graft(struct device *dev, struct Qdisc *parent, u32 classid, + struct Qdisc *new, struct Qdisc **old) +{ + int err = 0; + + if (parent == NULL) { + BUG_TRAP(classid == TC_H_ROOT); + if (new) { + new->parent = NULL; + new->classid = TC_H_ROOT; + } + *old = dev_set_scheduler(dev, new); + } else { + struct Qdisc_class_ops *cops = parent->ops->cl_ops; + + BUG_TRAP(classid != TC_H_ROOT); + + err = -EINVAL; + + if (cops) { + unsigned long cl = cops->get(parent, classid); + if (cl) { + err = cops->graft(parent, cl, new, old); + cops->put(parent, cl); + } + } + } + return err; +} + +#ifdef CONFIG_RTNETLINK + +/* + Allocate and initialize new qdisc. + + Parameters are passed via opt. + */ + +static struct Qdisc * +qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle, + u32 parentid, struct rtattr **tca, int *errp) +{ + int err; + struct rtattr *kind = tca[TCA_KIND-1]; + struct Qdisc *sch = NULL; + int size; + int new = 0; + + if (ops == NULL) { + ops = qdisc_lookup_ops(kind); + err = -EINVAL; + if (ops == NULL) + goto err_out; + new = 1; + } + + size = sizeof(*sch) + ops->priv_size; + + sch = kmalloc(size, GFP_KERNEL); + err = -ENOBUFS; + if (!sch) + goto err_out; + + /* Grrr... Resolve race condition with module unload */ + + err = -EINVAL; + if (new) { + if (ops != qdisc_lookup_ops(kind)) + goto err_out; + } else if (kind) { + if (rtattr_strcmp(kind, ops->id)) + goto err_out; + } + + memset(sch, 0, size); + + skb_queue_head_init(&sch->q); + sch->ops = ops; + sch->enqueue = ops->enqueue; + sch->dequeue = ops->dequeue; + sch->dev = dev; + if (handle == 0) { + handle = qdisc_alloc_handle(dev); + err = -ENOMEM; + if (handle == 0) + goto err_out; + } + sch->handle = handle; + sch->classid = parentid; + + if (ops->init && (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { + sch->next = dev->qdisc_list; + dev->qdisc_list = sch; +#ifdef CONFIG_NET_ESTIMATOR + if (tca[TCA_RATE-1]) + qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]); +#endif + return sch; + } + +err_out: + *errp = err; + if (sch) + kfree(sch); + return NULL; +} + + +/* + Create/delete/change/get qdisc. + */ + +static int tc_ctl_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +{ + struct tcmsg *tcm = NLMSG_DATA(n); + struct rtattr **tca = arg; + struct device *dev; + u32 clid = tcm->tcm_parent; + struct Qdisc *old_q; + struct Qdisc *q = NULL; + struct Qdisc *p = NULL; + struct Qdisc *leaf = NULL; + struct Qdisc_ops *qops = NULL; + int err; + + /* Find device */ + if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + return -ENODEV; + + /* If parent is specified, it must exist + and tcm_parent selects a class in parent which + new qdisc will be attached to. + + The place may be already busy by another qdisc, + remember this fact, if it was not auto-created discipline. + */ + if (clid) { + if (clid != TC_H_ROOT) { + p = qdisc_lookup(dev, TC_H_MAJ(clid)); + if (p == NULL) + return -ENOENT; + leaf = qdisc_lookup_class(dev, clid); + } else + leaf = dev->qdisc_sleeping; + + if (leaf && leaf->flags&TCQ_F_DEFAULT && n->nlmsg_type == RTM_NEWQDISC) + leaf = NULL; + + /* + Also, leaf may be exactly that qdisc, which we want + to control. Remember this to avoid one more qdisc_lookup. + */ + + if (leaf && leaf->handle == tcm->tcm_handle) + q = leaf; + } + + /* Try to locate the discipline */ + if (tcm->tcm_handle && q == NULL) { + if (TC_H_MIN(tcm->tcm_handle)) + return -EINVAL; + q = qdisc_lookup(dev, tcm->tcm_handle); + } + + /* If discipline already exists, check that its real parent + matches to one selected by tcm_parent. + */ + + if (q) { + if (clid && p != q->parent) + return -EINVAL; + BUG_TRAP(!leaf || leaf == q); + if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + return -EINVAL; + clid = q->classid; + goto process_existing; + } + + /* The discipline is known not to exist. + If parent was not selected too, return error. + */ + if (clid == 0) + return tcm->tcm_handle ? -ENOENT : -EINVAL; + + /* Check for the case when leaf is exactly the thing, + that you want. + */ + + if (leaf && tcm->tcm_handle == 0) { + q = leaf; + if (!tca[TCA_KIND-1] || rtattr_strcmp(tca[TCA_KIND-1], q->ops->id) == 0) + goto process_existing; + } + + if (n->nlmsg_type != RTM_NEWQDISC || !(n->nlmsg_flags&NLM_F_CREATE)) + return -ENOENT; + if (leaf && n->nlmsg_flags&NLM_F_EXCL) + return -EEXIST; + +create_and_graft: + q = qdisc_create(dev, qops, tcm->tcm_handle, clid, tca, &err); + if (q == NULL) + return err; + +graft: + err = qdisc_graft(dev, p, clid, q, &old_q); + if (err) { + if (q) + qdisc_destroy(q); + return err; + } + qdisc_notify(skb, n, old_q, q); + if (old_q) + qdisc_destroy(old_q); + return 0; + +process_existing: + + switch (n->nlmsg_type) { + case RTM_NEWQDISC: + if (n->nlmsg_flags&NLM_F_EXCL) + return -EEXIST; + qops = q->ops; + goto create_and_graft; + case RTM_GETQDISC: + qdisc_notify(skb, n, NULL, q); + return 0; + case RTM_DELQDISC: + q = NULL; + goto graft; + default: + return -EINVAL; + } +} + +static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, + pid_t pid, u32 seq, unsigned flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); + nlh->nlmsg_flags = flags; + tcm = NLMSG_DATA(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0; + tcm->tcm_parent = q->classid; + tcm->tcm_handle = q->handle; + tcm->tcm_info = 0; + RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); + if (q->ops->dump && q->ops->dump(q, skb) < 0) + goto rtattr_failure; + q->stats.qlen = q->q.qlen; + RTA_PUT(skb, TCA_STATS, sizeof(q->stats), &q->stats); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, + struct Qdisc *old, struct Qdisc *new) +{ + struct sk_buff *skb; + pid_t pid = oskb ? NETLINK_CB(oskb).pid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (old && !(old->flags&TCQ_F_DEFAULT)) { + if (tc_fill_qdisc(skb, old, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) + goto err_out; + } + if (new) { + if (tc_fill_qdisc(skb, new, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) + goto err_out; + } + + if (skb->len) + return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + +err_out: + kfree_skb(skb); + return -EINVAL; +} + +static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx, q_idx; + int s_idx, s_q_idx; + struct device *dev; + struct Qdisc *q; + + s_idx = cb->args[0]; + s_q_idx = q_idx = cb->args[1]; + for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + if (idx < s_idx) + continue; + if (idx > s_idx) + s_q_idx = 0; + for (q = dev->qdisc_list, q_idx = 0; q; + q = q->next, q_idx++) { + if (q_idx < s_q_idx) + continue; + if (tc_fill_qdisc(skb, q, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + } + } + +done: + cb->args[0] = idx; + cb->args[1] = q_idx; + + return skb->len; +} + + + +/************************************************ + * Traffic classes manipulation. * + ************************************************/ + + + +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +{ + struct tcmsg *tcm = NLMSG_DATA(n); + struct rtattr **tca = arg; + struct device *dev; + struct Qdisc *q = NULL; + struct Qdisc_class_ops *cops; + unsigned long cl = 0; + unsigned long new_cl; + u32 pid = tcm->tcm_parent; + u32 clid = tcm->tcm_handle; + u32 qid = TC_H_MAJ(clid); + int err; + + if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + return -ENODEV; + + /* + parent == TC_H_UNSPEC - unspecified parent. + parent == TC_H_ROOT - class is root, which has no parent. + parent == X:0 - parent is root class. + parent == X:Y - parent is a node in hierarchy. + parent == 0:Y - parent is X:Y, where X:0 is qdisc. + + handle == 0:0 - generate handle from kernel pool. + handle == 0:Y - class is X:Y, where X:0 is qdisc. + handle == X:Y - clear. + handle == X:0 - root class. + */ + + /* Step 1. Determine qdisc handle X:0 */ + + if (pid != TC_H_ROOT) { + u32 qid1 = TC_H_MAJ(pid); + + if (qid && qid1) { + /* If both majors are known, they must be identical. */ + if (qid != qid1) + return -EINVAL; + } else if (qid1) { + qid = qid1; + } else if (qid == 0) + qid = dev->qdisc_sleeping->handle; + + /* Now qid is genuine qdisc handle consistent + both with parent and child. + + TC_H_MAJ(pid) still may be unspecified, complete it now. + */ + if (pid) + pid = TC_H_MAKE(qid, pid); + } else { + if (qid == 0) + qid = dev->qdisc_sleeping->handle; + } + + /* OK. Locate qdisc */ + if ((q = qdisc_lookup(dev, qid)) == NULL) + return -ENOENT; + + /* An check that it supports classes */ + cops = q->ops->cl_ops; + if (cops == NULL) + return -EINVAL; + + /* Now try to get class */ + if (clid == 0) { + if (pid == TC_H_ROOT) + clid = qid; + } else + clid = TC_H_MAKE(qid, clid); + + if (clid) + cl = cops->get(q, clid); + + if (cl == 0) { + err = -ENOENT; + if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE)) + goto out; + } else { + switch (n->nlmsg_type) { + case RTM_NEWTCLASS: + err = -EEXIST; + if (n->nlmsg_flags&NLM_F_EXCL) + goto out; + break; + case RTM_DELTCLASS: + err = cops->delete(q, cl); + if (err == 0) + tclass_notify(skb, n, q, cl, RTM_DELTCLASS); + goto out; + case RTM_GETTCLASS: + err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS); + goto out; + default: + err = -EINVAL; + goto out; + } + } + + new_cl = cl; + err = cops->change(q, clid, pid, tca, &new_cl); + if (err == 0) + tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); + +out: + if (cl) + cops->put(q, cl); + + return err; +} + + +static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, + unsigned long cl, + pid_t pid, u32 seq, unsigned flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); + nlh->nlmsg_flags = flags; + tcm = NLMSG_DATA(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0; + tcm->tcm_parent = q->handle; + tcm->tcm_handle = q->handle; + tcm->tcm_info = 0; + RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); + if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0) + goto rtattr_failure; + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, + struct Qdisc *q, unsigned long cl, int event) +{ + struct sk_buff *skb; + pid_t pid = oskb ? NETLINK_CB(oskb).pid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); +} + +struct qdisc_dump_args +{ + struct qdisc_walker w; + struct sk_buff *skb; + struct netlink_callback *cb; +}; + +static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) +{ + struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; + + return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid, + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); +} + +static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +{ + int t; + int s_t; + struct device *dev; + struct Qdisc *q; + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); + struct qdisc_dump_args arg; + + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + return 0; + if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + return 0; + + s_t = cb->args[0]; + + for (q=dev->qdisc_list, t=0; q; q = q->next, t++) { + if (t < s_t) continue; + if (!q->ops->cl_ops) continue; + if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle + && (tcm->tcm_parent != TC_H_ROOT || q->parent != NULL)) + continue; + if (t > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + break; + } + + cb->args[0] = t; + + return skb->len; +} +#endif + +int psched_us_per_tick = 1; +int psched_tick_per_us = 1; + +#ifdef CONFIG_PROC_FS +static int psched_read_proc(char *buffer, char **start, off_t offset, + int length, int *eof, void *data) +{ + int len; + + len = sprintf(buffer, "%08x %08x\n", + psched_tick_per_us, psched_us_per_tick); + + len -= offset; + + if (len > length) + len = length; + if(len < 0) + len = 0; + + *start = buffer + offset; + *eof = 1; + + return len; +} +#endif + +psched_time_t psched_time_base; + +#if PSCHED_CLOCK_SOURCE == PSCHED_CPU +psched_tdiff_t psched_clock_per_hz; +int psched_clock_scale; +#endif + +#ifdef PSCHED_WATCHER +u32 psched_time_mark; + +static void psched_tick(unsigned long); + +static struct timer_list psched_timer = + { NULL, NULL, 0, 0L, psched_tick }; + +static void psched_tick(unsigned long dummy) +{ +#if PSCHED_CLOCK_SOURCE == PSCHED_CPU + psched_time_t dummy_stamp; + PSCHED_GET_TIME(dummy_stamp); + psched_timer.expires = jiffies + 4*HZ; +#else + unsigned long jiffies = now; + psched_time_base = ((u64)now)< delay) + return -1; + delay /= rdelay; + psched_tick_per_us = delay; + while ((delay>>=1) != 0) + psched_clock_scale++; + psched_us_per_tick = 1<>psched_clock_scale; + return 0; +} +#endif + +__initfunc(int pktsched_init(void)) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *ent; +#endif + +#if PSCHED_CLOCK_SOURCE == PSCHED_CPU + if (psched_calibrate_clock() < 0) + return -1; +#elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES + psched_tick_per_us = HZ<read_proc = psched_read_proc; +#endif + + return 0; +} diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_cbq.c linux/net/sched/sch_cbq.c --- v2.1.98/linux/net/sched/sch_cbq.c Sun Nov 30 14:00:40 1997 +++ linux/net/sched/sch_cbq.c Tue Apr 28 11:10:11 1998 @@ -10,6 +10,8 @@ * */ +#include +#include #include #include #include @@ -47,222 +49,279 @@ [3] Sally Floyd, "Notes on Class-Based Queueing: Setting Parameters", 1996 - Algorithm skeleton is taken from from NS simulator cbq.cc. + [4] Sally Floyd and Michael Speer, "Experimental Results + for Class-Based Queueing", 1998, not published. ----------------------------------------------------------------------- - Differences from NS version. - - --- WRR algorith is different. Our version looks more reasonable :-) - and fair when quanta are allowed to be less than MTU. - - --- cl->aveidle is REALLY limited from below by cl->minidle. - Seems, it was bug in NS. - - --- Purely lexical change: "depth" -> "level", "maxdepth" -> "toplevel". - When depth increases we expect, that the thing becomes lower, does not it? :-) - Besides that, "depth" word is semantically overloaded --- - "token bucket depth", "sfq depth"... Besides that, the algorithm - was called "top-LEVEL sharing". - - PROBLEM. - - --- Linux has no EOI event at the moment, so that we cannot - estimate true class idle time. Three workarounds are possible, - all of them have drawbacks: - - 1. (as now) Consider the next dequeue event as sign that - previous packet is finished. It is wrong because of ping-pong - buffers, but on permanently loaded link it is true. - 2. (NS approach) Use as link busy time estimate skb->leb/"physical - bandwidth". Even more wrong f.e. on ethernet real busy time much - higher because of collisions. - 3. (seems, the most clever) Split net bh to two parts: - NETRX_BH (for received packets) and preserve NET_BH for transmitter. - It will not require driver changes (NETRX_BH flag will be set - in netif_rx), but will allow to trace EOIs more precisely - and will save useless checks in net_bh. Besides that we will - have to eliminate random calling hard_start_xmit with dev->tbusy flag - (done) and to drop failure_q --- i.e. if !dev->tbusy hard_start_xmit - MUST succeed; failed packets will be dropped on the floor. + Algorithm skeleton is taken from from NS simulator cbq.cc. + If someone wants to check this text against LBL version, + he should take into account that ONLY skeleton is borrowed, + implementation is different. Particularly: + + --- WRR algorithm is different. Our version looks + more reasonable (I hope) and works when quanta are allowed + to be less than MTU, which always is the case, when real time + classes have small rates. Note, that the statement of [3] is incomplete, + Actually delay may be estimated even if class per-round allotment + less than MTU. Namely, if per-round allotment is W*r_i, + and r_1+...+r_k = r < 1 + + delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B + + In the worst case we have IntServ estimate with D = W*r+k*MTU + and C = MTU*r. The proof (if correct at all) is trivial. + + + --- Seems, cbq-2.0 is not very accurate. At least, I cannot + interpret some places, which look like wrong translation + from NS. Anyone is advertised to found these differences + and explain me, why I am wrong 8). + + --- Linux has no EOI event, so that we cannot estimate true class + idle time. Workaround is to consider the next dequeue event + as sign that previous packet is finished. It is wrong because of + internal device queueing, but on permanently loaded link it is true. + Moreover, combined with clock integrator, this scheme looks + very close to ideal solution. */ -#define CBQ_TOPLEVEL_SHARING -/* #define CBQ_NO_TRICKERY */ +struct cbq_sched_data; -#define CBQ_CLASSIFIER(skb, q) ((q)->fallback_class) struct cbq_class { + struct cbq_class *next; /* hash table link */ + struct cbq_class *next_alive; /* next class with backlog in this priority band */ + /* Parameters */ - int priority; /* priority */ -#ifdef CBQ_TOPLEVEL_SHARING - int level; /* level of the class in hierarchy: - 0 for leaf classes, and maximal - level of childrens + 1 for nodes. - */ + u32 classid; + unsigned char priority; /* class priority */ + unsigned char priority2; /* priority to be used after overlimit */ + unsigned char ewma_log; /* time constant for idle time calculation */ + unsigned char ovl_strategy; +#ifdef CONFIG_NET_CLS_POLICE + unsigned char police; #endif + u32 defmap; + + /* Link-sharing scheduler parameters */ long maxidle; /* Class paramters: see below. */ + long offtime; long minidle; - int filter_log; -#ifndef CBQ_NO_TRICKERY - long extradelay; -#endif + u32 avpkt; + struct qdisc_rate_table *R_tab; - long quantum; /* Allotment per WRR round */ - long rquantum; /* Relative allotment: see below */ + /* Overlimit strategy parameters */ + void (*overlimit)(struct cbq_class *cl); + long penalty; - int cell_log; - unsigned long L_tab[256]; + /* General scheduler (WRR) parameters */ + long allot; + long quantum; /* Allotment per WRR round */ + long weight; /* Relative allotment: see below */ - struct Qdisc *qdisc; /* ptr to CBQ discipline */ - struct cbq_class *root; /* Ptr to root class; - root can be not unique. - */ - struct cbq_class *parent; /* Ptr to parent in the class tree */ + struct Qdisc *qdisc; /* Ptr to CBQ discipline */ + struct cbq_class *split; /* Ptr to split node */ + struct cbq_class *share; /* Ptr to LS parent in the class tree */ + struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ struct cbq_class *borrow; /* NULL if class is bandwidth limited; parent otherwise */ + struct cbq_class *sibling; /* Sibling chain */ + struct cbq_class *children; /* Pointer to children chain */ struct Qdisc *q; /* Elementary queueing discipline */ - struct cbq_class *next; /* next class in this priority band */ - struct cbq_class *next_alive; /* next class with backlog in this priority band */ /* Variables */ - psched_time_t last; + unsigned char cpriority; /* Effective priority */ + unsigned char delayed; + unsigned char level; /* level of the class in hierarchy: + 0 for leaf classes, and maximal + level of children + 1 for nodes. + */ + + psched_time_t last; /* Last end of service */ psched_time_t undertime; long avgidle; long deficit; /* Saved deficit for WRR */ - char awake; /* Class is in alive list */ + unsigned long penalized; + struct tc_stats stats; + struct tc_cbq_xstats xstats; -#if 0 - void (*overlimit)(struct cbq_class *cl); -#endif -}; + struct tcf_proto *filter_list; + + int refcnt; + int filters; -#define L2T(cl,len) ((cl)->L_tab[(len)>>(cl)->cell_log]) + struct cbq_class *defaults[TC_PRIO_MAX+1]; +}; struct cbq_sched_data { - struct cbq_class *classes[CBQ_MAXPRIO]; /* List of all classes */ - int nclasses[CBQ_MAXPRIO]; - unsigned quanta[CBQ_MAXPRIO]; - unsigned mtu; - int cell_log; - unsigned long L_tab[256]; - struct cbq_class *fallback_class; + struct cbq_class *classes[16]; /* Hash table of all classes */ + int nclasses[TC_CBQ_MAXPRIO+1]; + unsigned quanta[TC_CBQ_MAXPRIO+1]; + + struct cbq_class link; unsigned activemask; - struct cbq_class *active[CBQ_MAXPRIO]; /* List of all classes - with backlog */ - struct cbq_class *last_sent; - int last_sent_len; + struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes + with backlog */ + struct cbq_class *tx_class; + struct cbq_class *tx_borrowed; + int tx_len; psched_time_t now; /* Cached timestamp */ + unsigned pmask; + struct timer_list delay_timer; struct timer_list wd_timer; /* Wathchdog timer, that started when CBQ has backlog, but cannot transmit just now */ - unsigned long wd_expires; -#ifdef CBQ_TOPLEVEL_SHARING - struct cbq_class *borrowed; + long wd_expires; int toplevel; -#endif + u32 hgenerator; }; -/* - WRR quanta - ---------- - cl->quantum is number added to class allotment on every round. - cl->rquantum is "relative" quantum. +#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log]) + +#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } + + +static __inline__ unsigned cbq_hash(u32 h) +{ + h ^= h>>8; + h ^= h>>4; + return h&0xF; +} + +static __inline__ struct cbq_class * +cbq_class_lookup(struct cbq_sched_data *q, u32 classid) +{ + struct cbq_class *cl; - For real-time classes: + for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next) + if (cl->classid == classid) + return cl; + return NULL; +} - cl->quantum = (cl->rquantum*q->nclasses[prio]*q->mtu)/q->quanta[prio] +#ifdef CONFIG_NET_CLS_POLICE - where q->quanta[prio] is sum of all rquanta for given priority. - cl->rquantum can be identified with absolute rate of the class - in arbitrary units (f.e. bytes/sec) +static struct cbq_class * +cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) +{ + struct cbq_class *cl, *new; - In this case, delay introduced by round-robin was estimated by - Sally Floyd [2] as: + for (cl = this->tparent; cl; cl = cl->tparent) + if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) + return new; - D = q->nclasses*q->mtu/(bandwidth/2) + return NULL; +} - Note, that D does not depend on class rate (it is very bad), - but not much worse than Gallager-Parekh estimate for CSZ - C/R = q->mtu/rate, when real-time classes have close rates. +#endif - For not real-time classes this folmula is not necessary, - so that cl->quantum can be set to any reasonable not zero value. - Apparently, it should be proportional to class rate, if the - rate is not zero. -*/ +/* Classify packet. The procedure is pretty complicated, but + it allows us to combine link sharing and priority scheduling + transparently. + + Namely, you can put link sharing rules (f.e. route based) at root of CBQ, + so that it resolves to split nodes. Then packeta are classified + by logical priority, or more specific classifier may be attached + to split node. + */ -/* - maxidle, minidle, extradelay - ---------------------------- +static struct cbq_class * +cbq_classify(struct sk_buff *skb, struct Qdisc *sch) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; + struct cbq_class *head = &q->link; + struct cbq_class **defmap; + struct cbq_class *cl = NULL; + u32 prio = skb->priority; + struct tcf_result res; - CBQ estimator calculates smoothed class idle time cl->aveidle, - considering class as virtual interface with corresponding bandwidth. - When cl->aveidle wants to be less than zero, class is overlimit. - When it is positive, class is underlimit. - - * maxidle bounds aveidle from above. - It controls maximal length of burst in this class after - long period of idle time. Burstness of active class - is controlled by filter constant cl->filter_log, - but this number is related to burst length only indirectly. - - * minidle is a negative number, normally set to zero. - Setting it to not zero value allows avgidle to drop - below zero, effectively penalizing class, when it is overlimit. - When the class load will decrease, it will take a time to - raise negative avgidle to put the class at limit. - It should be set to zero for leaf classes. - - * extradelay is penalty in delay, when a class goes overlimit. - I believe this parameter is useless and confusing. - Setting it to not zero forces class to accumulate - its "idleness" for extradelay and then send BURST of packets - until going to overlimit again. Non-sense. - - For details see [1] and [3]. - - Really, minidle and extradelay are irrelevant to real scheduling - task. As I understand, SF&VJ introduced them to experiment - with CBQ simulator in attempts to fix erratic behaviour - of ancestor-only (and, partially, top-level) algorithm. - - WARNING. - - User passes them measured in usecs, but cl->minidle, - cl->maxidle and cl->aveidle are scaled with cl->filter_log - in the text of the scheduler. -*/ + /* + * Step 1. If skb->priority points to one of our classes, use it. + */ + if (TC_H_MAJ(prio^sch->handle) == 0 && + (cl = cbq_class_lookup(q, prio)) != NULL) + return cl; + + for (;;) { + int result = 0; + + defmap = head->defaults; + + /* + * Step 2+n. Apply classifier. + */ + if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0) + goto fallback; + + if ((cl = (void*)res.class) == NULL) { + if (TC_H_MAJ(res.classid)) + cl = cbq_class_lookup(q, res.classid); + else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) + cl = defmap[TC_PRIO_BESTEFFORT]; + + if (cl == NULL) + goto fallback; + } + + if (cl->level == 0) { +#ifdef CONFIG_NET_CLS_POLICE + if (result) + return cbq_reclassify(skb, cl); +#endif + return cl; + } + + /* + * Step 3+n. If classifier selected link sharing class, + * apply agency specific classifier. + * Repeat this procdure until we hit leaf node. + */ + head = cl; + } + +fallback: + cl = head; + + /* + * Step 4. No success... + */ + if (TC_H_MAJ(prio) == 0 && + !(cl = head->defaults[prio&TC_PRIO_MAX]) && + !(cl = head->defaults[TC_PRIO_BESTEFFORT])) + return head; + + return cl; +} /* A packet has just been enqueued on the empty class. - cbq_wakeup_class adds it to the tail of active class list + cbq_activate_class adds it to the tail of active class list of its priority band. */ -static __inline__ void cbq_wakeup_class(struct cbq_class *cl) +static __inline__ void cbq_activate_class(struct cbq_class *cl) { struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; - int prio = cl->priority; + int prio = cl->cpriority; struct cbq_class *cl_tail; - cl->awake = 1; - cl_tail = q->active[prio]; q->active[prio] = cl; if (cl_tail != NULL) { cl->next_alive = cl_tail->next_alive; + cl_tail->next_alive = cl; cl->deficit = 0; } else { cl->next_alive = cl; @@ -271,58 +330,353 @@ } } +/* + Unlink class from active chain. + Note, that the same procedure is made directly in cbq_dequeue* + during round-robin procedure. + */ + +static void cbq_deactivate_class(struct cbq_class *this) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; + int prio = this->cpriority; + struct cbq_class *cl; + struct cbq_class *cl_prev = q->active[prio]; + + do { + cl = cl_prev->next_alive; + if (cl == this) { + cl_prev->next_alive = cl->next_alive; + cl->next_alive = NULL; + + if (cl == q->active[prio]) { + q->active[prio] = cl_prev; + if (cl == q->active[prio]) { + q->active[prio] = NULL; + q->activemask &= ~(1<next_alive; + cl->deficit += cl->quantum; + return; + } + } while ((cl_prev = cl) != q->active[prio]); +} + +static __inline__ void +cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) +{ + if (q->toplevel > 0) { + psched_time_t now; + PSCHED_GET_TIME(now); + if (PSCHED_TLESS(now, q->now)) + now = q->now; + if (PSCHED_TLESS(cl->undertime, now)) { + q->toplevel = 0; + return; + } + while ((cl = cl->borrow) != NULL + && q->toplevel > cl->level) { + if (PSCHED_TLESS(cl->borrow->undertime, now)) { + q->toplevel = cl->level; + return; + } + } + } +} + static int cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; - struct cbq_class *cl = CBQ_CLASSIFIER(skb, q); + struct cbq_class *cl = cbq_classify(skb, sch); + int len = skb->len; - if (cl->q->enqueue(skb, cl->q) == 1) { + if (cl && cl->q->enqueue(skb, cl->q) == 1) { sch->q.qlen++; + sch->stats.packets++; + cl->stats.packets++; + sch->stats.bytes+=len; + cl->stats.bytes+=len; + cbq_mark_toplevel(q, cl); + if (!cl->next_alive) + cbq_activate_class(cl); + return 1; + } -#ifdef CBQ_TOPLEVEL_SHARING - if (q->toplevel > 0) { - psched_time_t now; - PSCHED_GET_TIME(now); - if (PSCHED_TLESS(cl->undertime, now)) - q->toplevel = 0; - else if (q->toplevel > 1 && cl->borrow && - PSCHED_TLESS(cl->borrow->undertime, now)) - q->toplevel = 1; - } -#endif - if (!cl->awake) - cbq_wakeup_class(cl); + sch->stats.drops++; + if (cl == NULL) + kfree_skb(skb); + else + cl->stats.drops++; + return 0; +} + +static int +cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl; + + if ((cl = q->tx_class) == NULL) { + kfree_skb(skb); + sch->stats.drops++; + return 0; + } + q->tx_class = NULL; + + if (cl->q->ops->requeue(skb, cl->q) == 1) { + sch->q.qlen++; + cbq_mark_toplevel(q, cl); + if (!cl->next_alive) + cbq_activate_class(cl); return 1; } + sch->stats.drops++; + cl->stats.drops++; return 0; } -static __inline__ void cbq_delay(struct cbq_sched_data *q, struct cbq_class *cl) +/* Overlimit actions */ + +/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ + +static void cbq_ovl_classic(struct cbq_class *cl) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; + + if (!cl->delayed) { + psched_tdiff_t delay; + + delay = PSCHED_TDIFF(cl->undertime, q->now); + delay += cl->offtime; + + /* + Class goes to sleep, so that it will have no + chance to work avgidle. Let's forgive it 8) + + BTW cbq-2.0 has a crap in this + place, apparently they forgot to shift it by cl->ewma_log. + */ + if (cl->avgidle < 0) + delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); + if (cl->avgidle < cl->minidle) + cl->avgidle = cl->minidle; + if (delay < 0) + delay = 0; + PSCHED_TADD2(q->now, delay, cl->undertime); + + if (q->wd_expires == 0 || q->wd_expires > delay) + q->wd_expires = delay; + cl->xstats.overactions++; + cl->delayed = 1; + } +} + +/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when + they go overlimit + */ + +static void cbq_ovl_rclassic(struct cbq_class *cl) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; + + while (cl && cl->delayed) { + cl = cl->borrow; + if (cl->level > q->toplevel) + return; + } + + if (cl) + cbq_ovl_classic(cl); +} + +/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */ + +static void cbq_ovl_delay(struct cbq_class *cl) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; + + if (!cl->delayed) { + psched_tdiff_t delay; + unsigned long sched = jiffies; + + delay = PSCHED_TDIFF(cl->undertime, q->now); + delay += cl->offtime; + if (cl->avgidle < 0) + delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); + if (cl->avgidle < cl->minidle) + cl->avgidle = cl->minidle; + PSCHED_TADD2(q->now, delay, cl->undertime); + + if (delay > 0) { + sched += PSCHED_US2JIFFIE(delay) + cl->penalty; + cl->penalized = sched; + cl->cpriority = TC_CBQ_MAXPRIO; + q->pmask |= (1<delay_timer) && + (long)(q->delay_timer.expires - sched) > 0) + q->delay_timer.expires = sched; + add_timer(&q->delay_timer); + cl->delayed = 1; + cl->xstats.overactions++; + } + } +} + +/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */ + +static void cbq_ovl_lowprio(struct cbq_class *cl) { - long delay; + struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; + + cl->penalized = jiffies + cl->penalty; + + if (cl->cpriority != cl->priority2) { + cl->cpriority = cl->priority2; + q->pmask |= (1<cpriority); + cl->xstats.overactions++; + } + cbq_ovl_classic(cl); +} + +/* TC_CBQ_OVL_DROP: penalize class by dropping */ - delay = PSCHED_TDIFF(cl->undertime, q->now); - if (q->wd_expires == 0 || q->wd_expires - delay > 0) - q->wd_expires = delay; +static void cbq_ovl_drop(struct cbq_class *cl) +{ + if (cl->q->ops->drop) + if (cl->q->ops->drop(cl->q)) + cl->qdisc->q.qlen--; + cl->xstats.overactions++; + cbq_ovl_classic(cl); } static void cbq_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc*)arg; + qdisc_wakeup(sch->dev); +} + +static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio) +{ + struct cbq_class *cl; + struct cbq_class *cl_prev = q->active[prio]; + unsigned long now = jiffies; + unsigned long sched = now; + + if (cl_prev == NULL) + return now; + + do { + cl = cl_prev->next_alive; + if ((long)(now - cl->penalized) > 0) { + cl_prev->next_alive = cl->next_alive; + cl->next_alive = NULL; + cl->cpriority = cl->priority; + cl->delayed = 0; + cbq_activate_class(cl); + + if (cl == q->active[prio]) { + q->active[prio] = cl_prev; + if (cl == q->active[prio]) { + q->active[prio] = NULL; + return 0; + } + } + + cl = cl_prev->next_alive; + } else if ((long)(sched - cl->penalized) > 0) + sched = cl->penalized; + } while ((cl_prev = cl) != q->active[prio]); + + return (long)(sched - now); +} + +static void cbq_undelay(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc*)arg; struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; + long delay = 0; + unsigned pmask; + + pmask = q->pmask; + q->pmask = 0; + + while (pmask) { + int prio = ffz(~pmask); + long tmp; + + pmask &= ~(1< 0) { + q->pmask |= 1<delay_timer.expires = jiffies + delay; + add_timer(&q->delay_timer); + } - q->wd_timer.expires = 0; - q->wd_timer.function = NULL; qdisc_wakeup(sch->dev); } + +#ifdef CONFIG_NET_CLS_POLICE + +static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) +{ + int len = skb->len; + struct Qdisc *sch = child->parent; + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl = cbq_class_lookup(q, child->classid); + + if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { + if (cl->q->enqueue(skb, cl->q) == 1) { + sch->q.qlen++; + sch->stats.packets++; + cl->stats.packets++; + sch->stats.bytes+=len; + cl->stats.bytes+=len; + cbq_mark_toplevel(q, cl); + if (!cl->next_alive) + cbq_activate_class(cl); + return 0; + } + sch->stats.drops++; + return 0; + } + + sch->stats.drops++; + return -1; +} +#endif + +static __inline__ void +cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) +{ + if (cl && q->toplevel >= cl->level) { + if (cl->q->q.qlen <= 1 || PSCHED_TLESS(q->now, cl->undertime)) + q->toplevel = TC_CBQ_MAXLEVEL; + else /* BUGGGG? if (cl != this) */ + q->toplevel = cl->level; + } +} + static __inline__ void cbq_update(struct cbq_sched_data *q) { - struct cbq_class *cl; + struct cbq_class *cl = q->tx_class; + int len = q->tx_len; + + q->tx_class = NULL; - for (cl = q->last_sent; cl; cl = cl->parent) { + for ( ; cl; cl = cl->share) { long avgidle = cl->avgidle; long idle; @@ -333,26 +687,17 @@ idle = (now - last) - last_pktlen/rate */ - idle = PSCHED_TDIFF(q->now, cl->last) - - L2T(cl, q->last_sent_len); + idle = PSCHED_TDIFF(q->now, cl->last) - L2T(cl, len); /* true_avgidle := (1-W)*true_avgidle + W*idle, - where W=2^{-filter_log}. But cl->avgidle is scaled: + where W=2^{-ewma_log}. But cl->avgidle is scaled: cl->avgidle == true_avgidle/W, hence: */ - avgidle += idle - (avgidle>>cl->filter_log); + avgidle += idle - (avgidle>>cl->ewma_log); if (avgidle <= 0) { /* Overlimit or at-limit */ -#ifdef CBQ_NO_TRICKERY - avgidle = 0; -#else - if (avgidle < cl->minidle) - avgidle = cl->minidle; -#endif - - /* This line was missing in NS. */ cl->avgidle = avgidle; /* Calculate expected time, when this class @@ -362,29 +707,24 @@ idle = (1/W - 1)*(-true_avgidle) or idle = (1 - W)*(-cl->avgidle); + */ + idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); + /* That is not all. - We want to set undertime to the moment, when - the class is allowed to start next transmission i.e. - (undertime + next_pktlen/phys_bandwidth) - - now - next_pktlen/rate = idle - or - undertime = now + idle + next_pktlen/rate - - next_pktlen/phys_bandwidth - - We do not know next packet length, but can - estimate it with average packet length - or current packet_length. + To maintain rate allocated to class, + we add to undertime virtual clock, + necassry to complete transmitted packet. + (len/phys_bandwidth has been already passed + to the moment of cbq_update) */ - idle = (-avgidle) - ((-avgidle) >> cl->filter_log); - idle += L2T(q, q->last_sent_len); - idle -= L2T(cl, q->last_sent_len); + idle -= L2T(&q->link, len); + idle += L2T(cl, len); + + PSCHED_AUDIT_TDIFF(idle); + PSCHED_TADD2(q->now, idle, cl->undertime); -#ifndef CBQ_NO_TRICKERY - /* Do not forget extra delay :-) */ - PSCHED_TADD(cl->undertime, cl->extradelay); -#endif } else { /* Underlimit */ @@ -393,60 +733,44 @@ cl->avgidle = cl->maxidle; else cl->avgidle = avgidle; + } cl->last = q->now; } -#ifdef CBQ_TOPLEVEL_SHARING - cl = q->last_sent; - - if (q->borrowed && q->toplevel >= q->borrowed->level) { - if (cl->q->q.qlen <= 1 || PSCHED_TLESS(q->now, q->borrowed->undertime)) - q->toplevel = CBQ_MAXLEVEL; - else if (q->borrowed != cl) - q->toplevel = q->borrowed->level; - } -#endif - - q->last_sent = NULL; + cbq_update_toplevel(q, q->tx_borrowed); } -static __inline__ int +static __inline__ struct cbq_class * cbq_under_limit(struct cbq_class *cl) { struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; struct cbq_class *this_cl = cl; - if (PSCHED_IS_PASTPERFECT(cl->undertime) || cl->parent == NULL) - return 1; + if (cl->tparent == NULL) + return cl; - if (PSCHED_TLESS(cl->undertime, q->now)) { - q->borrowed = cl; - return 1; + if (PSCHED_IS_PASTPERFECT(cl->undertime) || + PSCHED_TLESS(cl->undertime, q->now)) { + cl->delayed = 0; + return cl; } while (!PSCHED_IS_PASTPERFECT(cl->undertime) && PSCHED_TLESS(q->now, cl->undertime)) { - cl = cl->borrow; - if (cl == NULL -#ifdef CBQ_TOPLEVEL_SHARING - || cl->level > q->toplevel -#endif - ) { -#if 0 + if ((cl = cl->borrow) == NULL || cl->level > q->toplevel) { + this_cl->stats.overlimits++; this_cl->overlimit(this_cl); -#else - cbq_delay(q, this_cl); -#endif - return 0; + return NULL; } } - q->borrowed = cl; - return 1; + this_cl->xstats.borrows++; + cl->xstats.borrows++; + return cl; } static __inline__ struct sk_buff * -cbq_dequeue_prio(struct Qdisc *sch, int prio, int fallback) +cbq_dequeue_prio(struct Qdisc *sch, int prio) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct cbq_class *cl_tail, *cl_prev, *cl; @@ -461,23 +785,14 @@ /* Start round */ do { + struct cbq_class *borrow; + /* Class is empty */ - if (cl->q->q.qlen == 0) + if (cl->q->q.qlen == 0) goto skip_class; - - if (fallback) { - /* Fallback pass: all classes are overlimit; - we send from the first class that is allowed - to borrow. - */ - if (cl->borrow == NULL) - goto skip_class; - } else { - /* Normal pass: check that class is under limit */ - if (!cbq_under_limit(cl)) - goto skip_class; - } + if ((borrow = cbq_under_limit(cl)) == NULL) + goto skip_class; if (cl->deficit <= 0) { /* Class exhausted its allotment per this @@ -496,8 +811,9 @@ goto skip_class; cl->deficit -= skb->len; - q->last_sent = cl; - q->last_sent_len = skb->len; + q->tx_class = cl; + q->tx_borrowed = borrow; + q->tx_len = skb->len; if (cl->deficit <= 0) { q->active[prio] = cl; @@ -509,10 +825,12 @@ skip_class: cl->deficit = 0; - if (cl->q->q.qlen == 0) { - /* Class is empty, declare it dead */ + if (cl->q->q.qlen == 0 || prio != cl->cpriority) { + /* Class is empty or penalized. + Unlink it from active chain. + */ cl_prev->next_alive = cl->next_alive; - cl->awake = 0; + cl->next_alive = NULL; /* Did cl_tail point to it? */ if (cl == cl_tail) { @@ -524,9 +842,17 @@ /* Kill the band! */ q->active[prio] = NULL; q->activemask &= ~(1<q->q.qlen) + cbq_activate_class(cl); return NULL; } + + q->active[prio] = cl_tail; } + if (cl->q->q.qlen) + cbq_activate_class(cl); + + cl = cl_prev; } next_class: @@ -537,22 +863,22 @@ } while (deficit); q->active[prio] = cl_prev; - + return NULL; } static __inline__ struct sk_buff * -cbq_dequeue_1(struct Qdisc *sch, int fallback) +cbq_dequeue_1(struct Qdisc *sch) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct sk_buff *skb; unsigned activemask; - activemask = q->activemask; + activemask = q->activemask&0xFF; while (activemask) { int prio = ffz(~activemask); activemask &= ~(1<data; + psched_time_t now; - PSCHED_GET_TIME(q->now); + PSCHED_GET_TIME(now); - if (q->last_sent) + if (q->tx_class) { + /* Time integrator. We calculate EOS time + by adding expected packet transmittion time. + If real time is greater, we warp artificial clock, + so that: + + cbq_time = max(real_time, work); + */ + PSCHED_TADD(q->now, L2T(&q->link, q->tx_len)); + if (PSCHED_TLESS(q->now, now)) + q->now = now; cbq_update(q); + } else if (PSCHED_TLESS(q->now, now)) + q->now = now; - q->wd_expires = 0; + for (;;) { + q->wd_expires = 0; - skb = cbq_dequeue_1(sch, 0); - if (skb) - return skb; - - /* All the classes are overlimit. - Search for overlimit class, which is allowed to borrow - and use it as fallback case. - */ + skb = cbq_dequeue_1(sch); + if (skb) { + sch->q.qlen--; + return skb; + } -#ifdef CBQ_TOPLEVEL_SHARING - q->toplevel = CBQ_MAXLEVEL; -#endif + /* All the classes are overlimit. + + It is possible, if: + + 1. Scheduler is empty. + 2. Toplevel cutoff inhibited borrowing. + 3. Root class is overlimit. - skb = cbq_dequeue_1(sch, 1); - if (skb) - return skb; + Reset 2d and 3d conditions and retry. + + Note, that NS and cbq-2.0 are buggy, peeking + an arbitrary class is appropriate for ancestor-only + sharing, but not for toplevel algorithm. + + Our version is better, but slower, because requires + two passes, but it is inavoidable with top-level sharing. + */ + + if (q->toplevel == TC_CBQ_MAXLEVEL && + PSCHED_IS_PASTPERFECT(q->link.undertime)) + break; + + q->toplevel = TC_CBQ_MAXLEVEL; + PSCHED_SET_PASTPERFECT(q->link.undertime); + } /* No packets in scheduler or nobody wants to give them to us :-( Sigh... start watchdog timer in the last case. */ - if (sch->q.qlen && q->wd_expires) { - if (q->wd_timer.function) + if (sch->q.qlen) { + sch->stats.overlimits++; + if (q->wd_expires && !sch->dev->tbusy) { + long delay = PSCHED_US2JIFFIE(q->wd_expires); del_timer(&q->wd_timer); - q->wd_timer.function = cbq_watchdog; - q->wd_timer.expires = jiffies + PSCHED_US2JIFFIE(q->wd_expires); - add_timer(&q->wd_timer); + if (delay <= 0) + delay = 1; + q->wd_timer.expires = jiffies + delay; + add_timer(&q->wd_timer); + } } return NULL; } @@ -606,234 +965,974 @@ static void cbq_adjust_levels(struct cbq_class *this) { - struct cbq_class *cl; + if (this == NULL) + return; - for (cl = this->parent; cl; cl = cl->parent) { - if (cl->level > this->level) - return; - cl->level = this->level + 1; - this = cl; - } + do { + int level = 0; + struct cbq_class *cl; + + if ((cl = this->children) != NULL) { + do { + if (cl->level > level) + level = cl->level; + } while ((cl = cl->sibling) != this->children); + } + this->level = level+1; + } while ((this = this->tparent) != NULL); } static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) { struct cbq_class *cl; + unsigned h; if (q->quanta[prio] == 0) return; - for (cl = q->classes[prio]; cl; cl = cl->next) { - if (cl->rquantum) - cl->quantum = (cl->rquantum*q->mtu*q->nclasses[prio])/ - q->quanta[prio]; + for (h=0; h<16; h++) { + for (cl = q->classes[h]; cl; cl = cl->next) { + /* BUGGGG... Beware! This expression suffer of + arithmetic overflows! + */ + if (cl->priority == prio) { + cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ + q->quanta[prio]; + } + if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) { + printk("Damn! %08x cl->quantum==%ld\n", cl->classid, cl->quantum); + cl->quantum = 1; + } + } } } -static __inline__ int cbq_unlink_class(struct cbq_class *this) +static void cbq_sync_defmap(struct cbq_class *cl) { - struct cbq_class *cl, **clp; - struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; + struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; + struct cbq_class *split = cl->split; + unsigned h; + int i; - for (clp = &q->classes[this->priority]; (cl = *clp) != NULL; - clp = &cl->next) { - if (cl == this) { - *clp = cl->next; - return 0; - } + if (split == NULL) + return; + + for (i=0; i<=TC_PRIO_MAX; i++) { + if (split->defaults[i] == cl && !(cl->defmap&(1<defaults[i] = NULL; } - return -ENOENT; -} -static int cbq_prune(struct cbq_class *this) -{ - struct cbq_class *cl; - int prio = this->priority; - struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; + for (i=0; i<=TC_PRIO_MAX; i++) { + int level = split->level; - qdisc_reset(this->q); + if (split->defaults[i]) + continue; - if (cbq_unlink_class(this)) - return -ENOENT; + for (h=0; h<16; h++) { + struct cbq_class *c; - if (this->awake) { - struct cbq_class *cl_prev = q->active[prio]; - do { - cl = cl_prev->next_alive; - if (cl == this) { - cl_prev->next_alive = cl->next_alive; - - if (cl == q->active[prio]) { - q->active[prio] = cl; - if (cl == q->active[prio]) { - q->active[prio] = NULL; - q->activemask &= ~(1<classes[h]; c; c = c->next) { + if (c->split == split && c->level < level && + c->defmap&(1<defaults[i] = c; + level = c->level; } - - cl = cl->next_alive; - cl->deficit += cl->quantum; - break; } - } while ((cl_prev = cl) != q->active[prio]); + } } +} + +static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask) +{ + struct cbq_class *split = NULL; - --q->nclasses[prio]; - if (this->rquantum) { - q->quanta[prio] -= this->rquantum; - cbq_normalize_quanta(q, prio); + if (splitid == 0) { + if ((split = cl->split) == NULL) + return; + splitid = split->classid; } - if (q->fallback_class == this) - q->fallback_class = NULL; + if (split == NULL || split->classid != splitid) { + for (split = cl->tparent; split; split = split->tparent) + if (split->classid == splitid) + break; + } - this->parent = NULL; - this->borrow = NULL; - this->root = this; - this->qdisc = NULL; - return 0; + if (split == NULL) + return; + + if (cl->split != split) { + cl->defmap = 0; + cbq_sync_defmap(cl); + cl->split = split; + cl->defmap = def&mask; + } else + cl->defmap = (cl->defmap&~mask)|(def&mask); + + cbq_sync_defmap(cl); } -static int cbq_graft(struct cbq_class *this, struct cbq_class *parent) +static void cbq_unlink_class(struct cbq_class *this) { struct cbq_class *cl, **clp; - int prio = this->priority; struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; - qdisc_reset(this->q); + for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) { + if (cl == this) { + *clp = cl->next; + cl->next = NULL; + break; + } + } + if (this->tparent) { + clp=&this->sibling; + cl = *clp; + do { + if (cl == this) { + *clp = cl->sibling; + break; + } + clp = &cl->sibling; + } while ((cl = *clp) != this->sibling); - for (clp = &q->classes[prio]; (cl = *clp) != NULL; clp = &cl->next) { - if (cl == this) - return -EBUSY; + if (this->tparent->children == this) { + this->tparent->children = this->sibling; + if (this->sibling == this) + this->tparent->children = NULL; + } + } else { + BUG_TRAP(this->sibling == this); } +} + +static void cbq_link_class(struct cbq_class *this) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; + unsigned h = cbq_hash(this->classid); + struct cbq_class *parent = this->tparent; + + this->sibling = this; + this->next = q->classes[h]; + q->classes[h] = this; - cl->next = NULL; - *clp = cl; - - cl->parent = parent; - cl->borrow = parent; - cl->root = parent ? parent->root : cl; + if (parent == NULL) + return; - ++q->nclasses[prio]; - if (this->rquantum) { - q->quanta[prio] += this->rquantum; - cbq_normalize_quanta(q, prio); + if (parent->children == NULL) { + parent->children = this; + } else { + this->sibling = parent->children->sibling; + parent->children->sibling = this; } - - cbq_adjust_levels(this); +} +static int cbq_drop(struct Qdisc* sch) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl; + int h; + + for (h = TC_CBQ_MAXPRIO; h >= 0; h++) { + for (cl = q->classes[h]; cl; cl = cl->next) { + if (cl->q->ops->drop && cl->q->ops->drop(cl->q)) + return 1; + } + } return 0; } - static void cbq_reset(struct Qdisc* sch) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct cbq_class *cl; int prio; + unsigned h; q->activemask = 0; - q->last_sent = NULL; - if (q->wd_timer.function) { - del_timer(&q->wd_timer); - q->wd_timer.expires = 0; - q->wd_timer.function = NULL; - } -#ifdef CBQ_TOPLEVEL_SHARING - q->toplevel = CBQ_MAXLEVEL; -#endif + q->pmask = 0; + q->tx_class = NULL; + q->tx_borrowed = NULL; + del_timer(&q->wd_timer); + del_timer(&q->delay_timer); + q->toplevel = TC_CBQ_MAXLEVEL; - for (prio = 0; prio < CBQ_MAXPRIO; prio++) { + for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) q->active[prio] = NULL; - - for (cl = q->classes[prio]; cl; cl = cl->next) { + + for (h = 0; h < 16; h++) { + for (cl = q->classes[h]; cl; cl = cl->next) { qdisc_reset(cl->q); cl->next_alive = NULL; PSCHED_SET_PASTPERFECT(cl->undertime); cl->avgidle = 0; cl->deficit = 0; - cl->awake = 0; + cl->cpriority = cl->priority; + } + } + sch->q.qlen = 0; +} + + +static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) +{ + if (lss->change&TCF_CBQ_LSS_FLAGS) { + cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; + cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; + } + if (lss->change&TCF_CBQ_LSS_EWMA) + cl->ewma_log = lss->ewma_log; + if (lss->change&TCF_CBQ_LSS_AVPKT) + cl->avpkt = lss->avpkt; + if (lss->change&TCF_CBQ_LSS_MINIDLE) + cl->minidle = -(long)lss->minidle; + if (lss->change&TCF_CBQ_LSS_MAXIDLE) + cl->maxidle = lss->maxidle; + if (lss->change&TCF_CBQ_LSS_OFFTIME) + cl->offtime = lss->offtime; + return 0; +} + +static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) +{ + q->nclasses[cl->priority]--; + q->quanta[cl->priority] -= cl->weight; + cbq_normalize_quanta(q, cl->priority); +} + +static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl) +{ + q->nclasses[cl->priority]++; + q->quanta[cl->priority] += cl->weight; + cbq_normalize_quanta(q, cl->priority); +} + +static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; + + if (wrr->allot) + cl->allot = wrr->allot; + if (wrr->weight) + cl->weight = wrr->weight; + if (wrr->priority) { + cl->priority = wrr->priority-1; + cl->cpriority = cl->priority; + if (cl->priority >= cl->priority2) + cl->priority2 = TC_CBQ_MAXPRIO-1; + } + + cbq_addprio(q, cl); + return 0; +} + +static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) +{ + switch (ovl->strategy) { + case TC_CBQ_OVL_CLASSIC: + cl->overlimit = cbq_ovl_classic; + break; + case TC_CBQ_OVL_DELAY: + cl->overlimit = cbq_ovl_delay; + break; + case TC_CBQ_OVL_LOWPRIO: + if (ovl->priority2-1 >= TC_CBQ_MAXPRIO || + ovl->priority2-1 <= cl->priority) + return -EINVAL; + cl->priority2 = ovl->priority2-1; + cl->overlimit = cbq_ovl_lowprio; + break; + case TC_CBQ_OVL_DROP: + cl->overlimit = cbq_ovl_drop; + break; + case TC_CBQ_OVL_RCLASSIC: + cl->overlimit = cbq_ovl_rclassic; + break; + default: + return -EINVAL; + } + cl->penalty = (ovl->penalty*HZ)/1000; + return 0; +} + +#ifdef CONFIG_NET_CLS_POLICE +static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) +{ + cl->police = p->police; + + if (!(cl->q->flags&TCQ_F_DEFAULT)) { + if (p->police == TC_POLICE_RECLASSIFY) + cl->q->reshape_fail = cbq_reshape_fail; + else + cl->q->reshape_fail = NULL; + } + return 0; +} +#endif + +static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) +{ + cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); + return 0; +} + +static int cbq_init(struct Qdisc *sch, struct rtattr *opt) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; + struct rtattr *tb[TCA_CBQ_MAX]; + struct tc_ratespec *r; + + if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 || + tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL || + RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec)) + return -EINVAL; + + if (tb[TCA_CBQ_LSSOPT-1] && + RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt)) + return -EINVAL; + + r = RTA_DATA(tb[TCA_CBQ_RATE-1]); + + MOD_INC_USE_COUNT; + if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL) { + MOD_DEC_USE_COUNT; + return -EINVAL; + } + + q->link.sibling = &q->link; + q->link.classid = sch->handle; + q->link.qdisc = sch; + if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + q->link.q = &noop_qdisc; + + q->link.priority = TC_CBQ_MAXPRIO-1; + q->link.priority2 = TC_CBQ_MAXPRIO-1; + q->link.cpriority = TC_CBQ_MAXPRIO-1; + q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; + q->link.overlimit = cbq_ovl_classic; + q->link.allot = psched_mtu(sch->dev); + q->link.quantum = q->link.allot; + q->link.weight = q->link.R_tab->rate.rate; + + q->link.ewma_log = TC_CBQ_DEF_EWMA; + q->link.avpkt = q->link.allot/2; + q->link.minidle = -0x7FFFFFFF; + + init_timer(&q->wd_timer); + q->wd_timer.data = (unsigned long)sch; + q->wd_timer.function = cbq_watchdog; + init_timer(&q->delay_timer); + q->delay_timer.data = (unsigned long)sch; + q->delay_timer.function = cbq_undelay; + q->toplevel = TC_CBQ_MAXLEVEL; + + cbq_link_class(&q->link); + + if (tb[TCA_CBQ_LSSOPT-1]) + cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); + + cbq_addprio(q, &q->link); + return 0; +} + +#ifdef CONFIG_RTNETLINK + +static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) +{ + unsigned char *b = skb->tail; + + RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) +{ + unsigned char *b = skb->tail; + struct tc_cbq_lssopt opt; + + opt.flags = 0; + if (cl->borrow == NULL) + opt.flags |= TCF_CBQ_LSS_BOUNDED; + if (cl->share == NULL) + opt.flags |= TCF_CBQ_LSS_ISOLATED; + opt.ewma_log = cl->ewma_log; + opt.level = cl->level; + opt.avpkt = cl->avpkt; + opt.maxidle = cl->maxidle; + opt.minidle = (u32)(-cl->minidle); + opt.offtime = cl->offtime; + opt.change = ~0; + RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) +{ + unsigned char *b = skb->tail; + struct tc_cbq_wrropt opt; + + opt.flags = 0; + opt.allot = cl->allot; + opt.priority = cl->priority+1; + opt.cpriority = cl->cpriority+1; + opt.weight = cl->weight; + RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) +{ + unsigned char *b = skb->tail; + struct tc_cbq_ovl opt; + + opt.strategy = cl->ovl_strategy; + opt.priority2 = cl->priority2+1; + opt.penalty = (cl->penalty*1000)/HZ; + RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) +{ + unsigned char *b = skb->tail; + struct tc_cbq_fopt opt; + + if (cl->split || cl->defmap) { + opt.split = cl->split ? cl->split->classid : 0; + opt.defmap = cl->defmap; + opt.defchange = ~0; + RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt); + } + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_NET_CLS_POLICE +static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) +{ + unsigned char *b = skb->tail; + struct tc_cbq_police opt; + + if (cl->police) { + opt.police = cl->police; + RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); + } + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) +{ + if (cbq_dump_lss(skb, cl) < 0 || + cbq_dump_rate(skb, cl) < 0 || + cbq_dump_wrr(skb, cl) < 0 || + cbq_dump_ovl(skb, cl) < 0 || +#ifdef CONFIG_NET_CLS_POLICE + cbq_dump_police(skb, cl) < 0 || +#endif + cbq_dump_fopt(skb, cl) < 0) + return -1; + return 0; +} + +static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; + unsigned char *b = skb->tail; + struct rtattr *rta; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + if (cbq_dump_attr(skb, &q->link) < 0) + goto rtattr_failure; + rta->rta_len = skb->tail - b; + q->link.xstats.avgidle = q->link.avgidle; + RTA_PUT(skb, TCA_XSTATS, sizeof(q->link.xstats), &q->link.xstats); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int +cbq_dump_class(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; + struct cbq_class *cl = (struct cbq_class*)arg; + unsigned char *b = skb->tail; + struct rtattr *rta; + + if (cl->tparent) + tcm->tcm_parent = cl->tparent->classid; + else + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = cl->classid; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + if (cbq_dump_attr(skb, cl) < 0) + goto rtattr_failure; + rta->rta_len = skb->tail - b; + cl->stats.qlen = cl->q->q.qlen; + RTA_PUT(skb, TCA_STATS, sizeof(cl->stats), &cl->stats); + cl->xstats.avgidle = cl->avgidle; + cl->xstats.undertime = 0; + if (!PSCHED_IS_PASTPERFECT(cl->undertime)) + cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now); + RTA_PUT(skb, TCA_XSTATS, sizeof(cl->xstats), &cl->xstats); + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#endif + +static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct cbq_class *cl = (struct cbq_class*)arg; + + if (cl) { + if (new == NULL) { + if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL) + return -ENOBUFS; + } else { +#ifdef CONFIG_NET_CLS_POLICE + if (cl->police == TC_POLICE_RECLASSIFY) + new->reshape_fail = cbq_reshape_fail; +#endif } + if ((*old = xchg(&cl->q, new)) != NULL) + qdisc_reset(*old); + + return 0; + } + return -ENOENT; +} + +static unsigned long cbq_get(struct Qdisc *sch, u32 classid) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl = cbq_class_lookup(q, classid); + + if (cl) { + cl->refcnt++; + return (unsigned long)cl; + } + return 0; +} + +static void cbq_destroy_filters(struct cbq_class *cl) +{ + struct tcf_proto *tp; + + while ((tp = cl->filter_list) != NULL) { + cl->filter_list = tp->next; + tp->ops->destroy(tp); } } +static void cbq_destroy_class(struct cbq_class *cl) +{ + cbq_destroy_filters(cl); + qdisc_destroy(cl->q); + qdisc_put_rtab(cl->R_tab); +#ifdef CONFIG_NET_ESTIMATOR + qdisc_kill_estimator(&cl->stats); +#endif +} + static void cbq_destroy(struct Qdisc* sch) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; - struct cbq_class *cl, **clp; - int prio; + struct cbq_class *cl; + unsigned h; + + for (h = 0; h < 16; h++) { + for (cl = q->classes[h]; cl; cl = cl->next) + cbq_destroy_filters(cl); + } + + for (h = 0; h < 16; h++) { + for (cl = q->classes[h]; cl; cl = cl->next) + if (cl != &q->link) + cbq_destroy_class(cl); + } + + qdisc_put_rtab(q->link.R_tab); +} + +static void cbq_put(struct Qdisc *q, unsigned long arg) +{ + struct cbq_class *cl = (struct cbq_class*)arg; + + if (--cl->refcnt == 0) + cbq_destroy_class(cl); + return; +} + +static int +cbq_change(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca, + unsigned long *arg) +{ + int err; + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl = (struct cbq_class*)*arg; + struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *tb[TCA_CBQ_MAX]; + struct cbq_class *parent; + struct qdisc_rate_table *rtab = NULL; + + if (opt==NULL || + rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt))) + return -EINVAL; + + if (tb[TCA_CBQ_OVL_STRATEGY-1] && + RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl)) + return -EINVAL; + + if (tb[TCA_CBQ_FOPT-1] && + RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt)) + return -EINVAL; + + if (tb[TCA_CBQ_RATE-1] && + RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec)) + return -EINVAL; + + if (tb[TCA_CBQ_LSSOPT-1] && + RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt)) + return -EINVAL; + + if (tb[TCA_CBQ_WRROPT-1] && + RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt)) + return -EINVAL; + +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_CBQ_POLICE-1] && + RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police)) + return -EINVAL; +#endif + + if (cl) { + /* Check parent */ + if (parentid) { + if (cl->tparent && cl->tparent->classid != parentid) + return -EINVAL; + if (!cl->tparent && parentid != TC_H_ROOT) + return -EINVAL; + } + + if (tb[TCA_CBQ_RATE-1]) { + rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]); + if (rtab == NULL) + return -EINVAL; + } - for (prio = 0; prio < CBQ_MAXPRIO; prio++) { - struct cbq_class *cl_head = q->classes[prio]; - - for (clp = &cl_head; (cl=*clp) != NULL; clp = &cl->next) { - qdisc_destroy(cl->q); - kfree(cl); + /* Change class parameters */ + start_bh_atomic(); + + if (cl->next_alive != NULL) + cbq_deactivate_class(cl); + + if (rtab) { + rtab = xchg(&cl->R_tab, rtab); + qdisc_put_rtab(rtab); + } + + if (tb[TCA_CBQ_LSSOPT-1]) + cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); + + if (tb[TCA_CBQ_WRROPT-1]) { + cbq_rmprio(q, cl); + cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1])); } + + if (tb[TCA_CBQ_OVL_STRATEGY-1]) + cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); + +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_CBQ_POLICE-1]) + cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); +#endif + + if (tb[TCA_CBQ_FOPT-1]) + cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); + + if (cl->q->q.qlen) + cbq_activate_class(cl); + + end_bh_atomic(); + +#ifdef CONFIG_NET_ESTIMATOR + if (tca[TCA_RATE-1]) { + qdisc_kill_estimator(&cl->stats); + qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]); + } +#endif + return 0; + } + + if (parentid == TC_H_ROOT) + return -EINVAL; + + if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL || + tb[TCA_CBQ_LSSOPT-1] == NULL) + return -EINVAL; + + rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]); + if (rtab == NULL) + return -EINVAL; + + if (classid) { + err = -EINVAL; + if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid)) + goto failure; + } else { + int i; + classid = TC_H_MAKE(sch->handle,0x8000); + + for (i=0; i<0x8000; i++) { + if (++q->hgenerator >= 0x8000) + q->hgenerator = 1; + if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) + break; + } + err = -ENOSR; + if (i >= 0x8000) + goto failure; + classid = classid|q->hgenerator; + } + + parent = &q->link; + if (parentid) { + parent = cbq_class_lookup(q, parentid); + err = -EINVAL; + if (parent == NULL) + goto failure; } + + err = -ENOBUFS; + cl = kmalloc(sizeof(*cl), GFP_KERNEL); + if (cl == NULL) + goto failure; + memset(cl, 0, sizeof(*cl)); + cl->R_tab = rtab; + rtab = NULL; + cl->refcnt = 1; + if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + cl->q = &noop_qdisc; + cl->classid = classid; + cl->tparent = parent; + cl->qdisc = sch; + cl->allot = parent->allot; + cl->quantum = cl->allot; + cl->weight = cl->R_tab->rate.rate; + + start_bh_atomic(); + cbq_link_class(cl); + cl->borrow = cl->tparent; + if (cl->tparent != &q->link) + cl->share = cl->tparent; + cl->minidle = -0x7FFFFFFF; + cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); + cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1])); + if (cl->ewma_log==0) + cl->ewma_log = q->link.ewma_log; + if (cl->maxidle==0) + cl->maxidle = q->link.maxidle; + if (cl->avpkt==0) + cl->avpkt = q->link.avpkt; + cl->overlimit = cbq_ovl_classic; + if (tb[TCA_CBQ_OVL_STRATEGY-1]) + cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_CBQ_POLICE-1]) + cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); +#endif + if (tb[TCA_CBQ_FOPT-1]) + cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); + cbq_adjust_levels(parent); + end_bh_atomic(); + +#ifdef CONFIG_NET_ESTIMATOR + if (tca[TCA_RATE-1]) + qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]); +#endif + + *arg = (unsigned long)cl; + return 0; + +failure: + qdisc_put_rtab(rtab); + return err; } -static int cbq_control(struct Qdisc *sch, void *arg) +static int cbq_delete(struct Qdisc *sch, unsigned long arg) { - struct cbq_sched_data *q; + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl = (struct cbq_class*)arg; + + if (cl->filters || cl->children || cl == &q->link) + return -EBUSY; + + start_bh_atomic(); + + if (cl->next_alive) + cbq_deactivate_class(cl); + + if (q->tx_class == cl) + q->tx_class = cl->borrow; + if (q->tx_borrowed == cl) + q->tx_borrowed = q->tx_class; + + cbq_unlink_class(cl); + cbq_adjust_levels(cl->tparent); + cl->defmap = 0; + cbq_sync_defmap(cl); - q = (struct cbq_sched_data *)sch->data; + cbq_rmprio(q, cl); - /* Do attachment here. It is the last thing to do. */ + if (--cl->refcnt == 0) + cbq_destroy_class(cl); - return -EINVAL; + end_bh_atomic(); + + return 0; } -static int cbq_init(struct Qdisc *sch, void *arg) +static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg) { - struct cbq_sched_data *q; - struct cbqctl *ctl = (struct cbqctl*)arg; + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl = (struct cbq_class *)arg; - q = (struct cbq_sched_data *)sch->data; - init_timer(&q->wd_timer); - q->wd_timer.data = (unsigned long)sch; -#ifdef CBQ_TOPLEVEL_SHARING - q->toplevel = CBQ_MAXLEVEL; -#endif + if (cl == NULL) + cl = &q->link; + + return &cl->filter_list; +} + +static unsigned long cbq_bind_filter(struct Qdisc *sch, u32 classid) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl = cbq_class_lookup(q, classid); + if (cl) { + cl->filters++; + return (unsigned long)cl; + } return 0; } +static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) +{ + struct cbq_class *cl = (struct cbq_class*)arg; + + cl->filters--; +} + +static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + unsigned h; + + if (arg->stop) + return; + + for (h = 0; h < 16; h++) { + struct cbq_class *cl; + + for (cl = q->classes[h]; cl; cl = cl->next) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, (unsigned long)cl, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } + } +} + +static struct Qdisc_class_ops cbq_class_ops = +{ + cbq_graft, + cbq_get, + cbq_put, + cbq_change, + cbq_delete, + cbq_walk, + + cbq_find_tcf, + cbq_bind_filter, + cbq_unbind_filter, -struct Qdisc_ops cbq_ops = +#ifdef CONFIG_RTNETLINK + cbq_dump_class, +#endif +}; + +struct Qdisc_ops cbq_qdisc_ops = { NULL, + &cbq_class_ops, "cbq", - 0, sizeof(struct cbq_sched_data), + cbq_enqueue, cbq_dequeue, + cbq_requeue, + cbq_drop, + + cbq_init, cbq_reset, cbq_destroy, - cbq_init, - cbq_control, + +#ifdef CONFIG_RTNETLINK + cbq_dump, +#endif }; #ifdef MODULE int init_module(void) { - int err; - - /* Load once and never free it. */ - MOD_INC_USE_COUNT; - - err = register_qdisc(&cbq_ops); - if (err) - MOD_DEC_USE_COUNT; - return err; + return register_qdisc(&cbq_qdisc_ops); } void cleanup_module(void) { + unregister_qdisc(&cbq_qdisc_ops); } #endif diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_csz.c linux/net/sched/sch_csz.c --- v2.1.98/linux/net/sched/sch_csz.c Thu Feb 12 20:56:15 1998 +++ linux/net/sched/sch_csz.c Tue Apr 28 11:10:11 1998 @@ -10,6 +10,8 @@ * */ +#include +#include #include #include #include @@ -48,16 +50,16 @@ but it has pretty poor delay characteristics. Round-robin scheduling and link-sharing goals apparently contradict to minimization of network delay and jitter. - Moreover, correct handling of predicted flows seems to be + Moreover, correct handling of predictive flows seems to be impossible in CBQ. CSZ presents more precise but less flexible and less efficient approach. As I understand, the main idea is to create WFQ flows for each guaranteed service and to allocate the rest of bandwith to dummy flow-0. Flow-0 comprises - the predicted services and the best effort traffic; + the predictive services and the best effort traffic; it is handled by a priority scheduler with the highest - priority band allocated for predicted services, and the rest --- + priority band allocated for predictive services, and the rest --- to the best effort packets. Note, that in CSZ flows are NOT limited to their bandwidth. @@ -67,14 +69,16 @@ will introduce undesired delays and raise jitter. At the moment CSZ is the only scheduler that provides - real guaranteed service. Another schemes (including CBQ) + true guaranteed service. Another schemes (including CBQ) do not provide guaranteed delay and randomize jitter. There exists the statement (Sally Floyd), that delay can be estimated by a IntServ compliant formulae. This result is true formally, but it is wrong in principle. - At first, it ignores delays introduced by link sharing. - And the second (and main) it limits bandwidth, - it is fatal flaw. + It takes into account only round-robin delays, + ignoring delays introduced by link sharing i.e. overlimiting. + Note, that temporary overlimits are inevitable because + real links are not ideal, and true algorithm must take it + into account. ALGORITHM. @@ -204,9 +208,8 @@ /* This number is arbitrary */ -#define CSZ_MAX_GUARANTEED 16 - -#define CSZ_FLOW_ID(skb) (CSZ_MAX_GUARANTEED) +#define CSZ_GUARANTEED 16 +#define CSZ_FLOWS (CSZ_GUARANTEED+4) struct csz_head { @@ -224,12 +227,15 @@ struct csz_head *fprev; /* Parameters */ - unsigned long rate; /* Flow rate. Fixed point is at rate_log */ - unsigned long *L_tab; /* Lookup table for L/(B*r_a) values */ - unsigned long max_bytes; /* Maximal length of queue */ + struct tc_ratespec rate; + struct tc_ratespec slice; + u32 *L_tab; /* Lookup table for L/(B*r_a) values */ + unsigned long limit; /* Maximal length of queue */ #ifdef CSZ_PLUS_TBF - unsigned long depth; /* Depth of token bucket, normalized + struct tc_ratespec peakrate; + __u32 buffer; /* Depth of token bucket, normalized as L/(B*r_a) */ + __u32 mtu; #endif /* Variables */ @@ -246,12 +252,11 @@ struct sk_buff_head q; /* FIFO queue */ }; -#define L2R(q,f,L) ((f)->L_tab[(L)>>(q)->cell_log]) +#define L2R(f,L) ((f)->L_tab[(L)>>(f)->slice.cell_log]) struct csz_sched_data { /* Parameters */ - unsigned char cell_log; /* 1< 2.1sec is MAXIMAL value */ /* Variables */ + struct tcf_proto *filter_list; + u8 prio2band[TC_PRIO_MAX+1]; #ifdef CSZ_PLUS_TBF struct timer_list wd_timer; long wd_expires; @@ -270,8 +277,8 @@ struct csz_head f; /* Flows sorted by "finish" */ struct sk_buff_head other[4];/* Predicted (0) and the best efforts - classes (1,2,3) */ - struct csz_flow flow[CSZ_MAX_GUARANTEED]; /* Array of flows */ + classes (1,2,3) */ + struct csz_flow flow[CSZ_GUARANTEED]; /* Array of flows */ }; /* These routines (csz_insert_finish and csz_insert_start) are @@ -353,7 +360,11 @@ It is another time consuming part, but it is impossible to avoid it. - Fixed point arithmetic is not ... does not ... Well, it is just CRAP. + It costs O(N) that make all the algorithm useful only + to play with closest to ideal fluid model. + + There exist less academic, but more practical modifications, + which might have even better characteristics (WF2Q+, HPFQ, HFSC) */ static unsigned long csz_update(struct Qdisc *sch) @@ -430,9 +441,9 @@ tmp = ((F-q->R_c)*q->rate)<R_log; R_c = F; - q->rate -= a->rate; + q->rate -= a->slice.rate; - if (delay - tmp >= 0) { + if ((long)(delay - tmp) >= 0) { delay -= tmp; continue; } @@ -443,35 +454,41 @@ return tmp; } +unsigned csz_classify(struct sk_buff *skb, struct csz_sched_data *q) +{ + return CSZ_GUARANTEED; +} + static int csz_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct csz_sched_data *q = (struct csz_sched_data *)sch->data; - unsigned flow_id = CSZ_FLOW_ID(skb); + unsigned flow_id = csz_classify(skb, q); unsigned long R; - int prio; + int prio = 0; struct csz_flow *this; - if (flow_id >= CSZ_MAX_GUARANTEED) { - prio = flow_id - CSZ_MAX_GUARANTEED; + if (flow_id >= CSZ_GUARANTEED) { + prio = flow_id - CSZ_GUARANTEED; flow_id = 0; } this = &q->flow[flow_id]; - if (this->q.qlen >= this->max_bytes || this->L_tab == NULL) { + if (this->q.qlen >= this->limit || this->L_tab == NULL) { + sch->stats.drops++; kfree_skb(skb); return 0; } R = csz_update(sch); - if (this->finish - R >= 0) { + if ((long)(this->finish - R) >= 0) { /* It was active */ - this->finish += L2R(q,this,skb->len); + this->finish += L2R(this,skb->len); } else { /* It is inactive; activate it */ - this->finish = R + L2R(q,this,skb->len); - q->rate += this->rate; + this->finish = R + L2R(this,skb->len); + q->rate += this->slice.rate; csz_insert_finish(&q->f, this); } @@ -486,6 +503,8 @@ else skb_queue_tail(&q->other[prio], skb); sch->q.qlen++; + sch->stats.bytes += skb->len; + sch->stats.packets++; return 1; } @@ -524,10 +543,6 @@ static void csz_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc*)arg; - struct csz_sched_data *q = (struct csz_sched_data*)sch->data; - - q->wd_timer.expires = 0; - q->wd_timer.function = NULL; qdisc_wakeup(sch->dev); } @@ -568,7 +583,7 @@ if (toks >= 0) { /* Now we have enough tokens to proceed */ - this->tokens = toks <= this->depth ? toks ? this->depth; + this->tokens = toks <= this->depth ? toks : this->depth; this->t_tbf = now; if (!this->throttled) @@ -601,7 +616,7 @@ This apriory shift in R will be adjusted later to reflect real delay. We cannot avoid it because of: - throttled flow continues to be active from the viewpoint - of CSZ, so that it would acquire highest priority, + of CSZ, so that it would acquire the highest priority, if you not adjusted start numbers. - Eventually, finish number would become less than round number and flow were declared inactive. @@ -654,7 +669,7 @@ #endif if (this->q.qlen) { struct sk_buff *nskb = skb_peek(&this->q); - this->start += L2R(q,this,nskb->len); + this->start += L2R(this,nskb->len); csz_insert_start(&q->s, this); } sch->q.qlen--; @@ -668,7 +683,7 @@ if (--this->q.qlen) { struct sk_buff *nskb; - unsigned dequeued = L2R(q,this,skb->len); + unsigned dequeued = L2R(this,skb->len); /* We got not the same thing that peeked earlier; adjust start number @@ -677,7 +692,7 @@ this->start += dequeued - peeked; nskb = skb_peek_best(q); - peeked = L2R(q,this,nskb->len); + peeked = L2R(this,nskb->len); this->start += peeked; this->peeked = peeked; csz_insert_start(&q->s, this); @@ -692,11 +707,13 @@ Schedule watchdog timer, if it occured because of shaping. */ if (q->wd_expires) { - if (q->wd_timer.function) - del_timer(&q->wd_timer); - q->wd_timer.function = csz_watchdog; - q->wd_timer.expires = jiffies + PSCHED_US2JIFFIE(q->wd_expires); + unsigned long delay = PSCHED_US2JIFFIE(q->wd_expires); + del_timer(&q->wd_timer); + if (delay == 0) + delay = 1; + q->wd_timer.expires = jiffies + delay; add_timer(&q->wd_timer); + sch->stats.overlimits++; } #endif return NULL; @@ -706,17 +723,14 @@ csz_reset(struct Qdisc* sch) { struct csz_sched_data *q = (struct csz_sched_data *)sch->data; - struct sk_buff *skb; int i; for (i=0; i<4; i++) - while ((skb=skb_dequeue(&q->other[i])) != NULL) - kfree_skb(skb); + skb_queue_purge(&q->other[i]); - for (i=0; iflow + i; - while ((skb = skb_dequeue(&this->q)) != NULL) - kfree_skb(skb); + skb_queue_purge(&this->q); this->snext = this->sprev = this->fnext = this->fprev = (struct csz_head*)this; this->start = this->finish = 0; @@ -727,10 +741,7 @@ #ifdef CSZ_PLUS_TBF PSCHED_GET_TIME(&q->t_tbf); q->tokens = q->depth; - if (q->wd_timer.function) { - del_timer(&q->wd_timer); - q->wd_timer.function = NULL; - } + del_timer(&q->wd_timer); #endif sch->q.qlen = 0; } @@ -738,25 +749,34 @@ static void csz_destroy(struct Qdisc* sch) { -/* - struct csz_sched_data *q = (struct csz_sched_data *)sch->data; - int i; - - for (i=0; i<4; i++) - qdisc_destroy(q->other[i]); - */ + MOD_DEC_USE_COUNT; } -static int csz_init(struct Qdisc *sch, void *arg) +static int csz_init(struct Qdisc *sch, struct rtattr *opt) { struct csz_sched_data *q = (struct csz_sched_data *)sch->data; - struct cszinitctl *ctl = (struct cszinitctl*)arg; + struct rtattr *tb[TCA_CSZ_PTAB]; + struct tc_csz_qopt *qopt; int i; + rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)); + if (tb[TCA_CSZ_PARMS-1] == NULL || + RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*qopt)) + return -EINVAL; + qopt = RTA_DATA(tb[TCA_CSZ_PARMS-1]); + + q->R_log = qopt->R_log; + q->delta_log = qopt->delta_log; + for (i=0; i<=TC_PRIO_MAX; i++) { + if (qopt->priomap[i] >= CSZ_FLOWS) + return -EINVAL; + q->prio2band[i] = qopt->priomap[i]; + } + for (i=0; i<4; i++) skb_queue_head_init(&q->other[i]); - for (i=0; iflow + i; skb_queue_head_init(&this->q); this->snext = this->sprev = @@ -769,64 +789,268 @@ #ifdef CSZ_PLUS_TBF init_timer(&q->wd_timer); q->wd_timer.data = (unsigned long)sch; + q->wd_timer.function = csz_watchdog; +#endif + MOD_INC_USE_COUNT; + return 0; +} + +#ifdef CONFIG_RTNETLINK +static int csz_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + unsigned char *b = skb->tail; + struct rtattr *rta; + struct tc_csz_qopt opt; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + + opt.flows = CSZ_FLOWS; + memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); + RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt); + rta->rta_len = skb->tail - b; + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} #endif - if (ctl) { - if (ctl->flows != CSZ_MAX_GUARANTEED) + + +static int csz_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, + struct Qdisc **old) +{ + return -EINVAL; +} + +static unsigned long csz_get(struct Qdisc *sch, u32 classid) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + unsigned long band = TC_H_MIN(classid) - 1; + + if (band >= CSZ_FLOWS) + return 0; + + if (band < CSZ_GUARANTEED && q->flow[band].L_tab == NULL) + return 0; + + return band+1; +} + +static void csz_put(struct Qdisc *sch, unsigned long cl) +{ + return; +} + +static int csz_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg) +{ + unsigned long cl = *arg; + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *tb[TCA_CSZ_PTAB]; + struct tc_csz_copt *copt; + + rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)); + if (tb[TCA_CSZ_PARMS-1] == NULL || + RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*copt)) + return -EINVAL; + copt = RTA_DATA(tb[TCA_CSZ_PARMS-1]); + + if (tb[TCA_CSZ_RTAB-1] && + RTA_PAYLOAD(tb[TCA_CSZ_RTAB-1]) < 1024) + return -EINVAL; + + if (cl) { + struct csz_flow *a; + cl--; + if (cl >= CSZ_FLOWS) + return -ENOENT; + if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL) return -EINVAL; - q->cell_log = ctl->cell_log; + + a = &q->flow[cl]; + + start_bh_atomic(); +#if 0 + a->rate_log = copt->rate_log; +#endif +#ifdef CSZ_PLUS_TBF + a->limit = copt->limit; + a->rate = copt->rate; + a->buffer = copt->buffer; + a->mtu = copt->mtu; +#endif + + if (tb[TCA_CSZ_RTAB-1]) + memcpy(a->L_tab, RTA_DATA(tb[TCA_CSZ_RTAB-1]), 1024); + + end_bh_atomic(); + return 0; } + /* NI */ return 0; } -static int csz_control(struct Qdisc *sch, struct pschedctl *gctl) +static int csz_delete(struct Qdisc *sch, unsigned long cl) { -/* struct csz_sched_data *q = (struct csz_sched_data *)sch->data; - struct cszctl *ctl = (struct cszctl*)gctl->arg; - struct sk_buff *skb; - int i; + struct csz_flow *a; + + cl--; + + if (cl >= CSZ_FLOWS) + return -ENOENT; + if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL) + return -EINVAL; + + a = &q->flow[cl]; + + start_bh_atomic(); + a->fprev->fnext = a->fnext; + a->fnext->fprev = a->fprev; + a->sprev->snext = a->snext; + a->snext->sprev = a->sprev; + a->start = a->finish = 0; + kfree(xchg(&q->flow[cl].L_tab, NULL)); + end_bh_atomic(); - if (op == PSCHED_TC_ATTACH) { - - } -*/ return 0; } +#ifdef CONFIG_RTNETLINK +static int csz_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + unsigned char *b = skb->tail; + struct rtattr *rta; + struct tc_csz_copt opt; + + tcm->tcm_handle = sch->handle|cl; + + cl--; + + if (cl > CSZ_FLOWS) + goto rtattr_failure; + + if (cl < CSZ_GUARANTEED) { + struct csz_flow *f = &q->flow[cl]; + + if (f->L_tab == NULL) + goto rtattr_failure; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + + opt.limit = f->limit; + opt.rate = f->rate; + opt.slice = f->slice; + memset(&opt.peakrate, 0, sizeof(opt.peakrate)); +#ifdef CSZ_PLUS_TBF + opt.buffer = f->buffer; + opt.mtu = f->mtu; +#else + opt.buffer = 0; + opt.mtu = 0; +#endif + + RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt); + rta->rta_len = skb->tail - b; + } + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +static void csz_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + int prio = 0; + + if (arg->stop) + return; + + for (prio = 0; prio < CSZ_FLOWS; prio++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (prio < CSZ_GUARANTEED && q->flow[prio].L_tab == NULL) { + arg->count++; + continue; + } + if (arg->fn(sch, prio+1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct tcf_proto ** csz_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + + if (cl) + return NULL; + return &q->filter_list; +} + +struct Qdisc_class_ops csz_class_ops = +{ + csz_graft, + csz_get, + csz_put, + csz_change, + csz_delete, + csz_walk, + + csz_find_tcf, + csz_get, + csz_put, +#ifdef CONFIG_RTNETLINK + csz_dump_class, +#endif +}; -struct Qdisc_ops csz_ops = +struct Qdisc_ops csz_qdisc_ops = { NULL, + &csz_class_ops, "csz", - 0, sizeof(struct csz_sched_data), + csz_enqueue, csz_dequeue, + NULL, + NULL, + + csz_init, csz_reset, csz_destroy, - csz_init, - csz_control, + +#ifdef CONFIG_RTNETLINK + csz_dump, +#endif }; #ifdef MODULE -#include int init_module(void) { - int err; - - /* Load once and never free it. */ - MOD_INC_USE_COUNT; - - err = register_qdisc(&csz_ops); - if (err) - MOD_DEC_USE_COUNT; - return err; + return register_qdisc(&csz_qdisc_ops); } void cleanup_module(void) { + unregister_qdisc(&csz_qdisc_ops); } #endif diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_fifo.c linux/net/sched/sch_fifo.c --- v2.1.98/linux/net/sched/sch_fifo.c Thu Feb 12 20:56:15 1998 +++ linux/net/sched/sch_fifo.c Tue Apr 28 11:10:11 1998 @@ -1,9 +1,15 @@ /* - * net/sched/sch_fifo.c Simple FIFO "scheduler" + * net/sched/sch_fifo.c The simplest FIFO queue. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. * * Authors: Alexey Kuznetsov, */ +#include #include #include #include @@ -32,9 +38,7 @@ struct fifo_sched_data { - int qmaxbytes; - int qmaxlen; - int qbytes; + unsigned limit; }; static int @@ -42,41 +46,62 @@ { struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; - if (q->qbytes <= q->qmaxbytes) { - skb_queue_tail(&sch->q, skb); - q->qbytes += skb->len; - return 0; + if (sch->stats.backlog <= q->limit) { + __skb_queue_tail(&sch->q, skb); + sch->stats.backlog += skb->len; + sch->stats.bytes += skb->len; + sch->stats.packets++; + return 1; } - kfree_skb(skb); + sch->stats.drops++; +#ifdef CONFIG_NET_CLS_POLICE + if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch)) +#endif + kfree_skb(skb); + return 0; +} + +static int +bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + __skb_queue_head(&sch->q, skb); + sch->stats.backlog += skb->len; return 1; } static struct sk_buff * bfifo_dequeue(struct Qdisc* sch) { - struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; struct sk_buff *skb; - skb = skb_dequeue(&sch->q); + skb = __skb_dequeue(&sch->q); if (skb) - q->qbytes -= skb->len; + sch->stats.backlog -= skb->len; return skb; } -static void -bfifo_reset(struct Qdisc* sch) +static int +fifo_drop(struct Qdisc* sch) { - struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; struct sk_buff *skb; - while((skb=skb_dequeue(&sch->q)) != NULL) { - q->qbytes -= skb->len; + skb = __skb_dequeue_tail(&sch->q); + if (skb) { + sch->stats.backlog -= skb->len; kfree_skb(skb); + return 1; } - if (q->qbytes) { - printk("fifo_reset: qbytes=%d\n", q->qbytes); - q->qbytes = 0; - } + return 0; +} + +static void +fifo_reset(struct Qdisc* sch) +{ + struct sk_buff *skb; + + while ((skb=__skb_dequeue(&sch->q)) != NULL) + kfree_skb(skb); + sch->stats.backlog = 0; } static int @@ -84,96 +109,106 @@ { struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; - if (sch->q.qlen <= q->qmaxlen) { - skb_queue_tail(&sch->q, skb); - return 0; + if (sch->q.qlen <= q->limit) { + __skb_queue_tail(&sch->q, skb); + sch->stats.bytes += skb->len; + sch->stats.packets++; + return 1; } - kfree_skb(skb); + sch->stats.drops++; +#ifdef CONFIG_NET_CLS_POLICE + if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch)) +#endif + kfree_skb(skb); + return 0; +} + +static int +pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + __skb_queue_head(&sch->q, skb); return 1; } + static struct sk_buff * pfifo_dequeue(struct Qdisc* sch) { - return skb_dequeue(&sch->q); + return __skb_dequeue(&sch->q); } -static void -pfifo_reset(struct Qdisc* sch) + +static int fifo_init(struct Qdisc *sch, struct rtattr *opt) { - struct sk_buff *skb; + struct fifo_sched_data *q = (void*)sch->data; - while((skb=skb_dequeue(&sch->q))!=NULL) - kfree_skb(skb); + if (opt == NULL) { + q->limit = sch->dev->tx_queue_len; + if (sch->ops == &bfifo_qdisc_ops) + q->limit *= sch->dev->mtu; + } else { + struct tc_fifo_qopt *ctl = RTA_DATA(opt); + if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) + return -EINVAL; + q->limit = ctl->limit; + } + return 0; } - -static int fifo_init(struct Qdisc *sch, void *arg /* int bytes, int pkts */) +#ifdef CONFIG_RTNETLINK +static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { - struct fifo_sched_data *q; -/* - struct device *dev = sch->dev; - */ + struct fifo_sched_data *q = (void*)sch->data; + unsigned char *b = skb->tail; + struct tc_fifo_qopt opt; - q = (struct fifo_sched_data *)sch->data; -/* - if (pkts<0) - pkts = dev->tx_queue_len; - if (bytes<0) - bytes = pkts*dev->mtu; - q->qmaxbytes = bytes; - q->qmaxlen = pkts; - */ - return 0; + opt.limit = q->limit; + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } +#endif -struct Qdisc_ops pfifo_ops = +struct Qdisc_ops pfifo_qdisc_ops = { NULL, + NULL, "pfifo", - 0, sizeof(struct fifo_sched_data), + pfifo_enqueue, pfifo_dequeue, - pfifo_reset, - NULL, + pfifo_requeue, + fifo_drop, + fifo_init, + fifo_reset, + NULL, +#ifdef CONFIG_RTNETLINK + fifo_dump, +#endif }; -struct Qdisc_ops bfifo_ops = +struct Qdisc_ops bfifo_qdisc_ops = { NULL, - "pfifo", - 0, + NULL, + "bfifo", sizeof(struct fifo_sched_data), + bfifo_enqueue, bfifo_dequeue, - bfifo_reset, - NULL, - fifo_init, -}; + bfifo_requeue, + fifo_drop, -#ifdef MODULE -#include -int init_module(void) -{ - int err; - - /* Load once and never free it. */ - MOD_INC_USE_COUNT; - - err = register_qdisc(&pfifo_ops); - if (err == 0) { - err = register_qdisc(&bfifo_ops); - if (err) - unregister_qdisc(&pfifo_ops); - } - if (err) - MOD_DEC_USE_COUNT; - return err; -} - -void cleanup_module(void) -{ -} + fifo_init, + fifo_reset, + NULL, +#ifdef CONFIG_RTNETLINK + fifo_dump, #endif +}; diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_generic.c linux/net/sched/sch_generic.c --- v2.1.98/linux/net/sched/sch_generic.c Thu Feb 12 20:56:15 1998 +++ linux/net/sched/sch_generic.c Tue Apr 28 11:10:11 1998 @@ -30,66 +30,116 @@ #include #include +#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } + +/* Main transmission queue. */ + struct Qdisc_head qdisc_head = { &qdisc_head }; -static struct Qdisc_ops *qdisc_base = NULL; +/* Kick device. + Note, that this procedure can be called by watchdog timer, so that + we do not check dev->tbusy flag here. + + Returns: 0 - queue is empty. + >0 - queue is not empty, but throttled. + <0 - queue is not empty. Device is throttled, if dev->tbusy != 0. + + NOTE: Called only from NET BH +*/ -static int default_requeue(struct sk_buff *skb, struct Qdisc* qdisc); +int qdisc_restart(struct device *dev) +{ + struct Qdisc *q = dev->qdisc; + struct sk_buff *skb; + if ((skb = q->dequeue(q)) != NULL) { + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); -/* NOTES. + if (dev->hard_start_xmit(skb, dev) == 0) { + q->tx_last = jiffies; + return -1; + } - Every discipline has two major routines: enqueue and dequeue. + /* Device kicked us out :( + It is possible in three cases: - ---dequeue + 1. fastroute is enabled + 2. device cannot determine busy state + before start of transmission (f.e. dialout) + 3. device is buggy (ppp) + */ - dequeue usually returns a skb to send. It is allowed to return NULL, - but it does not mean that queue is empty, it just means that - discipline does not want to send anything this time. - Queue is really empty if q->q.qlen == 0. - For complicated disciplines with multiple queues q->q is not - real packet queue, but however q->q.qlen must be valid. + q->ops->requeue(skb, q); + return -1; + } + return q->q.qlen; +} - ---enqueue +/* Scan transmission queue and kick devices. - enqueue returns number of enqueued packets i.e. this number is 1, - if packet was enqueued sucessfully and <1 if something (not - necessary THIS packet) was dropped. + Deficiency: slow devices (ppp) and fast ones (100Mb ethernet) + share one queue. It means, that if we have a lot of loaded ppp channels, + we will scan a long list on every 100Mb EOI. + I have no idea how to solve it using only "anonymous" Linux mark_bh(). + To change queue from device interrupt? Ough... only not this... */ -int register_qdisc(struct Qdisc_ops *qops) +void qdisc_run_queues(void) { - struct Qdisc_ops *q, **qp; - for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) - if (strcmp(qops->id, q->id) == 0) - return -EEXIST; - qops->next = NULL; - qops->refcnt = 0; - *qp = qops; - return 0; -} + struct Qdisc_head **hp, *h; -int unregister_qdisc(struct Qdisc_ops *qops) -{ - struct Qdisc_ops *q, **qp; - for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) - if (q == qops) - break; - if (!q) - return -ENOENT; - if (q->requeue == NULL) - q->requeue = default_requeue; - *qp = q->next; - return 0; + hp = &qdisc_head.forw; + while ((h = *hp) != &qdisc_head) { + int res = -1; + struct Qdisc *q = (struct Qdisc*)h; + struct device *dev = q->dev; + + while (!dev->tbusy && (res = qdisc_restart(dev)) < 0) + /* NOTHING */; + + /* The explanation is necessary here. + qdisc_restart called dev->hard_start_xmit, + if device is virtual, it could trigger one more + dev_queue_xmit and new device could appear + in active chain. In this case we cannot unlink + empty queue, because we lost back pointer. + No problem, we will unlink it during the next round. + */ + + if (res == 0 && *hp == h) { + *hp = h->forw; + h->forw = NULL; + continue; + } + hp = &h->forw; + } } -struct Qdisc *qdisc_lookup(int handle) +/* Periodic watchdoc timer to recover of hard/soft device bugs. */ + +static void dev_do_watchdog(unsigned long dummy); + +static struct timer_list dev_watchdog = + { NULL, NULL, 0L, 0L, &dev_do_watchdog }; + +static void dev_do_watchdog(unsigned long dummy) { - return NULL; + struct Qdisc_head *h; + + for (h = qdisc_head.forw; h != &qdisc_head; h = h->forw) { + struct Qdisc *q = (struct Qdisc*)h; + struct device *dev = q->dev; + if (dev->tbusy && jiffies - q->tx_last > q->tx_timeo) + qdisc_restart(dev); + } + dev_watchdog.expires = jiffies + 5*HZ; + add_timer(&dev_watchdog); } + /* "NOOP" scheduler: the best scheduler, recommended for all interfaces in all curcumstances. It is difficult to invent anything more fast or cheap. @@ -108,11 +158,48 @@ return NULL; } +static int +noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +{ + if (net_ratelimit()) + printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); + kfree_skb(skb); + return 0; +} + +struct Qdisc_ops noop_qdisc_ops = +{ + NULL, + NULL, + "noop", + 0, + + noop_enqueue, + noop_dequeue, + noop_requeue, +}; + struct Qdisc noop_qdisc = { { NULL }, noop_enqueue, noop_dequeue, + TCQ_F_DEFAULT|TCQ_F_BUILTIN, + &noop_qdisc_ops, +}; + + +struct Qdisc_ops noqueue_qdisc_ops = +{ + NULL, + NULL, + "noqueue", + 0, + + noop_enqueue, + noop_dequeue, + noop_requeue, + }; struct Qdisc noqueue_qdisc = @@ -120,25 +207,32 @@ { NULL }, NULL, NULL, + TCQ_F_DEFAULT|TCQ_F_BUILTIN, + &noqueue_qdisc_ops, }; +static const u8 prio2band[TC_PRIO_MAX+1] = +{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; -/* 3-band FIFO queue: old style, but should be a bit faster (several CPU insns) */ +/* 3-band FIFO queue: old style, but should be a bit faster than + generic prio+fifo combination. + */ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - const static u8 prio2band[8] = { 1, 2, 2, 2, 1, 2, 0, 0 }; struct sk_buff_head *list; - list = ((struct sk_buff_head*)qdisc->data) + prio2band[skb->priority&7]; + list = ((struct sk_buff_head*)qdisc->data) + + prio2band[skb->priority&TC_PRIO_MAX]; if (list->qlen <= skb->dev->tx_queue_len) { __skb_queue_tail(list, skb); + qdisc->q.qlen++; return 1; } - qdisc->dropped++; + qdisc->stats.drops++; kfree_skb(skb); return 0; } @@ -152,8 +246,10 @@ for (prio = 0; prio < 3; prio++, list++) { skb = __skb_dequeue(list); - if (skb) + if (skb) { + qdisc->q.qlen--; return skb; + } } return NULL; } @@ -161,12 +257,13 @@ static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - const static u8 prio2band[8] = { 1, 2, 2, 2, 1, 2, 0, 0 }; struct sk_buff_head *list; - list = ((struct sk_buff_head*)qdisc->data) + prio2band[skb->priority&7]; + list = ((struct sk_buff_head*)qdisc->data) + + prio2band[skb->priority&TC_PRIO_MAX]; __skb_queue_head(list, skb); + qdisc->q.qlen++; return 1; } @@ -178,16 +275,17 @@ for (prio=0; prio < 3; prio++) skb_queue_purge(list+prio); + qdisc->q.qlen = 0; } -static int pfifo_fast_init(struct Qdisc *qdisc, void *arg) +static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) { int i; struct sk_buff_head *list; list = ((struct sk_buff_head*)qdisc->data); - for(i=0; i<3; i++) + for (i=0; i<3; i++) skb_queue_head_init(list+i); return 0; @@ -196,29 +294,20 @@ static struct Qdisc_ops pfifo_fast_ops = { NULL, + NULL, "pfifo_fast", - 1, 3 * sizeof(struct sk_buff_head), + pfifo_fast_enqueue, pfifo_fast_dequeue, - pfifo_fast_reset, + pfifo_fast_requeue, NULL, + pfifo_fast_init, - NULL, - pfifo_fast_requeue + pfifo_fast_reset, }; -static int -default_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); - kfree_skb(skb); - return 0; -} - -static struct Qdisc * -qdisc_alloc(struct device *dev, struct Qdisc_ops *ops, void *arg) +struct Qdisc * qdisc_create_dflt(struct device *dev, struct Qdisc_ops *ops) { struct Qdisc *sch; int size = sizeof(*sch) + ops->priv_size; @@ -233,56 +322,48 @@ sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev = dev; - if (ops->init && ops->init(sch, arg)) - return NULL; - ops->refcnt++; - return sch; + sch->flags |= TCQ_F_DEFAULT; + if (ops->init && ops->init(sch, NULL) == 0) + return sch; + + kfree(sch); + return NULL; } void qdisc_reset(struct Qdisc *qdisc) { struct Qdisc_ops *ops = qdisc->ops; - if (ops) { - start_bh_atomic(); - if (ops->reset) - ops->reset(qdisc); - end_bh_atomic(); - } + start_bh_atomic(); + if (ops->reset) + ops->reset(qdisc); + end_bh_atomic(); } void qdisc_destroy(struct Qdisc *qdisc) { struct Qdisc_ops *ops = qdisc->ops; - if (ops) { - start_bh_atomic(); - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); - ops->refcnt--; - end_bh_atomic(); - kfree(qdisc); - } -} - -static void dev_do_watchdog(unsigned long dummy); - -static struct timer_list dev_watchdog = - { NULL, NULL, 0L, 0L, &dev_do_watchdog }; - -static void dev_do_watchdog(unsigned long dummy) -{ - struct Qdisc_head *h; - - for (h = qdisc_head.forw; h != &qdisc_head; h = h->forw) { - struct Qdisc *q = (struct Qdisc*)h; - struct device *dev = q->dev; - if (dev->tbusy && jiffies - q->tx_last > q->tx_timeo) { - qdisc_restart(dev); - } +#ifdef CONFIG_NET_SCHED + if (qdisc->dev) { + struct Qdisc *q, **qp; + for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) + if (q == qdisc) { + *qp = q->next; + q->next = NULL; + break; + } } - dev_watchdog.expires = jiffies + 5*HZ; - add_timer(&dev_watchdog); +#ifdef CONFIG_NET_ESTIMATOR + qdisc_kill_estimator(&qdisc->stats); +#endif +#endif + start_bh_atomic(); + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + end_bh_atomic(); + if (!(qdisc->flags&TCQ_F_BUILTIN)) + kfree(qdisc); } @@ -291,15 +372,17 @@ /* No queueing discipline is attached to device; create default one i.e. pfifo_fast for devices, which need queueing and noqueue_qdisc for - virtual intrfaces + virtual interfaces */ if (dev->qdisc_sleeping == &noop_qdisc) { if (dev->tx_queue_len) { struct Qdisc *qdisc; - qdisc = qdisc_alloc(dev, &pfifo_fast_ops, NULL); - if (qdisc == NULL) + qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops); + if (qdisc == NULL) { + printk(KERN_INFO "%s: activation failed\n", dev->name); return; + } dev->qdisc_sleeping = qdisc; } else dev->qdisc_sleeping = &noqueue_qdisc; @@ -309,10 +392,9 @@ if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) { dev->qdisc->tx_timeo = 5*HZ; dev->qdisc->tx_last = jiffies - dev->qdisc->tx_timeo; - if (!dev_watchdog.expires) { + if (!del_timer(&dev_watchdog)) dev_watchdog.expires = jiffies + 5*HZ; - add_timer(&dev_watchdog); - } + add_timer(&dev_watchdog); } end_bh_atomic(); } @@ -323,8 +405,7 @@ start_bh_atomic(); - qdisc = dev->qdisc; - dev->qdisc = &noop_qdisc; + qdisc = xchg(&dev->qdisc, &noop_qdisc); qdisc_reset(qdisc); @@ -346,6 +427,7 @@ { dev->qdisc = &noop_qdisc; dev->qdisc_sleeping = &noop_qdisc; + dev->qdisc_list = NULL; } void dev_shutdown(struct device *dev) @@ -354,12 +436,15 @@ start_bh_atomic(); qdisc = dev->qdisc_sleeping; + dev->qdisc = &noop_qdisc; dev->qdisc_sleeping = &noop_qdisc; - qdisc_destroy(qdisc); + qdisc_destroy(qdisc); + BUG_TRAP(dev->qdisc_list == NULL); + dev->qdisc_list = NULL; end_bh_atomic(); } -void dev_set_scheduler(struct device *dev, struct Qdisc *qdisc) +struct Qdisc * dev_set_scheduler(struct device *dev, struct Qdisc *qdisc) { struct Qdisc *oqdisc; @@ -369,195 +454,20 @@ start_bh_atomic(); oqdisc = dev->qdisc_sleeping; - /* Destroy old scheduler */ + /* Prune old scheduler */ if (oqdisc) - qdisc_destroy(oqdisc); + qdisc_reset(oqdisc); - /* ... and attach new one */ + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; dev->qdisc_sleeping = qdisc; dev->qdisc = &noop_qdisc; end_bh_atomic(); if (dev->flags & IFF_UP) dev_activate(dev); -} - -/* Kick the queue "q". - Note, that this procedure is called by watchdog timer, so that - we do not check dev->tbusy flag here. - - Returns: 0 - queue is empty. - >0 - queue is not empty, but throttled. - <0 - queue is not empty. Device is throttled, if dev->tbusy != 0. - - NOTE: Called only from NET BH -*/ - - -int qdisc_restart(struct device *dev) -{ - struct Qdisc *q = dev->qdisc; - struct sk_buff *skb; - - if ((skb = q->dequeue(q)) != NULL) { - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); - - if (dev->hard_start_xmit(skb, dev) == 0) { - q->tx_last = jiffies; - return -1; - } - - if (q->ops) { - q->ops->requeue(skb, q); - return -1; - } - printk(KERN_DEBUG "%s: it is impossible!!!\n", dev->name); - kfree_skb(skb); - } - return q->q.qlen; + return oqdisc; } -void qdisc_run_queues(void) -{ - struct Qdisc_head **hp, *h; - - hp = &qdisc_head.forw; - while ((h = *hp) != &qdisc_head) { - int res = -1; - struct Qdisc *q = (struct Qdisc*)h; - struct device *dev = q->dev; - - while (!dev->tbusy && (res = qdisc_restart(dev)) < 0) - /* NOTHING */; - - /* The explanation is necessary here. - qdisc_restart called dev->hard_start_xmit, - if device is virtual, it could trigger one more - dev_queue_xmit and new device could appear - in active chain. In this case we cannot unlink - empty queue, because we lost back pointer. - No problem, we will unlink it during the next round. - */ - - if (res == 0 && *hp == h) { - *hp = h->forw; - h->forw = NULL; - continue; - } - hp = &h->forw; - } -} - - -int tc_init(struct pschedctl *pctl) -{ - struct Qdisc *q; - struct Qdisc_ops *qops; - - if (pctl->handle) { - q = qdisc_lookup(pctl->handle); - if (q == NULL) - return -ENOENT; - qops = q->ops; - if (pctl->ifindex && q->dev->ifindex != pctl->ifindex) - return -EINVAL; - } - return -EINVAL; -} - -int tc_destroy(struct pschedctl *pctl) -{ - return -EINVAL; -} - -int tc_attach(struct pschedctl *pctl) -{ - return -EINVAL; -} - -int tc_detach(struct pschedctl *pctl) -{ - return -EINVAL; -} - - -int psched_ioctl(void *arg) -{ - struct pschedctl ctl; - struct pschedctl *pctl = &ctl; - int err; - - if (copy_from_user(&ctl, arg, sizeof(ctl))) - return -EFAULT; - - if (ctl.arglen > 0) { - pctl = kmalloc(sizeof(ctl) + ctl.arglen, GFP_KERNEL); - if (pctl == NULL) - return -ENOBUFS; - memcpy(pctl, &ctl, sizeof(ctl)); - if (copy_from_user(pctl->args, ((struct pschedctl*)arg)->args, ctl.arglen)) { - kfree(pctl); - return -EFAULT; - } - } - - rtnl_lock(); - - switch (ctl.command) { - case PSCHED_TC_INIT: - err = tc_init(pctl); - break; - case PSCHED_TC_DESTROY: - err = tc_destroy(pctl); - break; - case PSCHED_TC_ATTACH: - err = tc_attach(pctl); - break; - case PSCHED_TC_DETACH: - err = tc_detach(pctl); - break; - default: - err = -EINVAL; - } - - rtnl_unlock(); - - if (pctl != &ctl) - kfree(pctl); - return err; -} - -__initfunc(int pktsched_init(void)) -{ -#define INIT_QDISC(name) { \ - extern struct Qdisc_ops name##_ops; \ - register_qdisc(&##name##_ops); \ - } - - register_qdisc(&pfifo_fast_ops); -#ifdef CONFIG_NET_SCH_CBQ - INIT_QDISC(cbq); -#endif -#ifdef CONFIG_NET_SCH_CSZ - INIT_QDISC(csz); -#endif -#ifdef CONFIG_NET_SCH_RED - INIT_QDISC(red); -#endif -#ifdef CONFIG_NET_SCH_SFQ - INIT_QDISC(sfq); -#endif -#ifdef CONFIG_NET_SCH_TBF - INIT_QDISC(tbf); -#endif -#ifdef CONFIG_NET_SCH_PFIFO - INIT_QDISC(pfifo); - INIT_QDISC(bfifo); -#endif -#ifdef CONFIG_NET_SCH_PRIO - INIT_QDISC(prio); -#endif - return 0; -} diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_prio.c linux/net/sched/sch_prio.c --- v2.1.98/linux/net/sched/sch_prio.c Sun Nov 30 14:00:40 1997 +++ linux/net/sched/sch_prio.c Tue Apr 28 11:10:11 1998 @@ -1,9 +1,16 @@ /* * net/sched/sch_prio.c Simple 3-band priority "scheduler". * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * * Authors: Alexey Kuznetsov, */ +#include +#include #include #include #include @@ -28,32 +35,69 @@ #include #include -/* New N-band generic scheduler */ struct prio_sched_data { - int qbytes; int bands; - u8 prio2band[8]; - struct Qdisc *queues[8]; + struct tcf_proto *filter_list; + u8 prio2band[TC_PRIO_MAX+1]; + struct Qdisc *queues[TCQ_PRIO_BANDS]; }; + +static __inline__ unsigned prio_classify(struct sk_buff *skb, struct Qdisc *sch) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + struct tcf_result res; + + res.classid = skb->priority; + if (TC_H_MAJ(res.classid) != sch->handle) { + if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) { + if (TC_H_MAJ(res.classid)) + res.classid = 0; + res.classid = q->prio2band[res.classid&TC_PRIO_MAX] + 1; + } + } + + return res.classid - 1; +} + static int prio_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct prio_sched_data *q = (struct prio_sched_data *)sch->data; - int prio = q->prio2band[skb->priority&7]; struct Qdisc *qdisc; - qdisc = q->queues[prio]; - if (qdisc->enqueue(skb, qdisc) == 0) { - q->qbytes += skb->len; + qdisc = q->queues[prio_classify(skb, sch)]; + + if (qdisc->enqueue(skb, qdisc) == 1) { + sch->stats.bytes += skb->len; + sch->stats.packets++; sch->q.qlen++; - return 0; + return 1; } - return 1; + sch->stats.drops++; + return 0; } + +static int +prio_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + struct Qdisc *qdisc; + + qdisc = q->queues[prio_classify(skb, sch)]; + + if (qdisc->ops->requeue(skb, qdisc) == 1) { + sch->q.qlen++; + return 1; + } + sch->stats.drops++; + return 0; +} + + static struct sk_buff * prio_dequeue(struct Qdisc* sch) { @@ -66,7 +110,6 @@ qdisc = q->queues[prio]; skb = qdisc->dequeue(qdisc); if (skb) { - q->qbytes -= skb->len; sch->q.qlen--; return skb; } @@ -75,6 +118,24 @@ } +static int +prio_drop(struct Qdisc* sch) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + int prio; + struct Qdisc *qdisc; + + for (prio = q->bands-1; prio >= 0; prio--) { + qdisc = q->queues[prio]; + if (qdisc->ops->drop(qdisc)) { + sch->q.qlen--; + return 1; + } + } + return 0; +} + + static void prio_reset(struct Qdisc* sch) { @@ -83,7 +144,7 @@ for (prio=0; priobands; prio++) qdisc_reset(q->queues[prio]); - q->qbytes = 0; + sch->q.qlen = 0; } static void @@ -96,51 +157,205 @@ qdisc_destroy(q->queues[prio]); q->queues[prio] = &noop_qdisc; } + MOD_DEC_USE_COUNT; } -static int prio_init(struct Qdisc *sch, void *arg) +static int prio_init(struct Qdisc *sch, struct rtattr *opt) { - const static u8 prio2band[8] = { 1, 2, 2, 2, 1, 2, 0, 0 }; - struct prio_sched_data *q; + static const u8 prio2band[TC_PRIO_MAX+1] = + { 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }; + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + unsigned mask = 0; int i; - q = (struct prio_sched_data *)sch->data; - q->bands = 3; - memcpy(q->prio2band, prio2band, sizeof(prio2band)); - for (i=0; ibands; i++) - q->queues[i] = &noop_qdisc; + if (opt == NULL) { + q->bands = 3; + memcpy(q->prio2band, prio2band, sizeof(prio2band)); + mask = 7; + } else { + struct tc_prio_qopt *qopt = RTA_DATA(opt); + + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) + return -EINVAL; + if (qopt->bands > TCQ_PRIO_BANDS) + return -EINVAL; + q->bands = qopt->bands; + for (i=0; i<=TC_PRIO_MAX; i++) { + if (qopt->priomap[i] >= q->bands) + return -EINVAL; + q->prio2band[i] = qopt->priomap[i]; + mask |= (1<priomap[i]); + } + } + for (i=0; iqueues[i] = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + if (q->queues[i] == NULL) + q->queues[i] = &noop_qdisc; + } + MOD_INC_USE_COUNT; + return 0; +} + +#ifdef CONFIG_RTNETLINK +static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + unsigned char *b = skb->tail; + struct tc_prio_qopt opt; + + opt.bands = q->bands; + memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + unsigned long band = arg - 1; + + if (band >= q->bands) + return -EINVAL; + + if (new == NULL) + new = &noop_qdisc; + + *old = xchg(&q->queues[band], new); + return 0; } -struct Qdisc_ops prio_ops = +static unsigned long prio_get(struct Qdisc *sch, u32 classid) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + unsigned long band = TC_H_MIN(classid); + + if (band - 1 >= q->bands) + return 0; + return band; +} + +static void prio_put(struct Qdisc *q, unsigned long cl) +{ + return; +} + +static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg) +{ + unsigned long cl = *arg; + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + +static int prio_delete(struct Qdisc *sch, unsigned long cl) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + + +#ifdef CONFIG_RTNETLINK +static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} +#endif + +static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + int prio; + + if (arg->stop) + return; + + for (prio = 0; prio < q->bands; prio++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, prio+1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + + if (cl) + return NULL; + return &q->filter_list; +} + +static struct Qdisc_class_ops prio_class_ops = +{ + prio_graft, + prio_get, + prio_put, + prio_change, + prio_delete, + prio_walk, + + prio_find_tcf, + prio_get, + prio_put, + +#ifdef CONFIG_RTNETLINK + prio_dump_class, +#endif +}; + +struct Qdisc_ops prio_qdisc_ops = { NULL, + &prio_class_ops, "prio", - 0, sizeof(struct prio_sched_data), + prio_enqueue, prio_dequeue, + prio_requeue, + prio_drop, + + prio_init, prio_reset, prio_destroy, - prio_init, + +#ifdef CONFIG_RTNETLINK + prio_dump, +#endif }; #ifdef MODULE -#include + int init_module(void) { - int err; - - /* Load once and never free it. */ - MOD_INC_USE_COUNT; - - err = register_qdisc(&prio_ops); - if (err) - MOD_DEC_USE_COUNT; - return err; + return register_qdisc(&prio_qdisc_ops); } void cleanup_module(void) { + unregister_qdisc(&prio_qdisc_ops); } + #endif diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_red.c linux/net/sched/sch_red.c --- v2.1.98/linux/net/sched/sch_red.c Thu Feb 12 20:56:15 1998 +++ linux/net/sched/sch_red.c Tue Apr 28 11:10:11 1998 @@ -1,5 +1,5 @@ /* - * net/sched/sch_red.c Random Early Detection scheduler. + * net/sched/sch_red.c Random Early Detection queue. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -9,6 +9,8 @@ * Authors: Alexey Kuznetsov, */ +#include +#include #include #include #include @@ -62,32 +64,42 @@ and mark (drop) packet with this probability. Pb changes from 0 (at avg==th_min) to max_P (avg==th_max). - max_P should be small (not 1!). + max_P should be small (not 1), usually 0.01..0.02 is good value. + + max_P is chosen as a number, so that max_P/(th_max-th_min) + is negative power of two in order arithmetics to contain + only shifts. + + + Parameters, settable by user: + ----------------------------- + + limit - bytes (must be > qth_max + burst) + + Hard limit on queue length, should be chosen >qth_max + to allow packet bursts. This parameter does not + affect algorithm behaviour and can be chosen + arbitrarily high (well, less than ram size) + Really, this limit will never be achieved + if RED works correctly. + + qth_min - bytes (should be < qth_max/2) + qth_max - bytes (should be at least 2*qth_min and less limit) + Wlog - bits (<32) log(1/W). + Plog - bits (<32) + + Plog is related to max_P by formula: + + max_P = (qth_max-qth_min)/2^Plog; + + F.e. if qth_max=128K and qth_min=32K, then Plog=22 + corresponds to max_P=0.02 + + Scell_log + Stab + + Lookup table for log((1-W)^(t/t_ave). - NB. SF&VJ assumed that Pb[avg] is linear function. I think it - is wrong. I'd make: - P[th_min] = 0, P[th_max] = 1; - dP/davg[th_min] = 0, dP/davg[th_max] = infinity, or a large number. - - I choose max_P as a number between 0.01 and 0.1, so that - C1 = max_P/(th_max-th_min) is power of two: C1 = 2^(-C1log) - - Parameters, settable by user (with default values): - - qmaxbytes=256K - hard limit on queue length, should be chosen >qth_max - to allow packet bursts. This parameter does not - affect algorithm behaviour and can be chosen - arbitrarily high (well, less than ram size) - Really, this limit will never be achieved - if RED works correctly. - qth_min=32K - qth_max=128K - qth_max should be at least 2*qth_min - Wlog=8 - log(1/W). - Alog=Wlog - fixed point position in th_min and th_max. - Rlog=10 - C1log=24 - C1log = trueC1log+Alog-Rlog - so that trueC1log=22 and max_P~0.02 - NOTES: @@ -97,10 +109,10 @@ If you want to allow bursts of L packets of size S, you should choose W: - L + 1 -th_min/S < (1-(1-W)^L)/W - - For th_min/S = 32 + L + 1 - th_min/S < (1-(1-W)^L)/W + th_min/S = 32 th_min/S = 4 + log(W) L -1 33 -2 35 @@ -117,33 +129,24 @@ struct red_sched_data { /* Parameters */ - unsigned long qmaxbytes; /* HARD maximal queue length */ - unsigned long qth_min; /* Min average length threshold: A scaled */ - unsigned long qth_max; /* Max average length threshold: A scaled */ - char Alog; /* Point position in average lengths */ + u32 limit; /* HARD maximal queue length */ + u32 qth_min; /* Min average length threshold: A scaled */ + u32 qth_max; /* Max average length threshold: A scaled */ + u32 Rmask; + u32 Scell_max; char Wlog; /* log(W) */ - char Rlog; /* random number bits */ - char C1log; /* log(1/C1) */ - char Slog; - char Stab[256]; + char Plog; /* random number bits */ + char Scell_log; + u8 Stab[256]; /* Variables */ - unsigned long qbytes; /* Queue length in bytes */ unsigned long qave; /* Average queue length: A scaled */ int qcount; /* Packets since last random number generation */ - unsigned qR; /* Cached random number [0..1qidlestart); PSCHED_GET_TIME(now); - us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, (256<Slog)-1, 0); - -/* It is wrong, but I do not think that SF+VJ proposal is reasonable - and did not invented anything more clever 8) + us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0); +/* The problem: ideally, average length queue recalcultion should be done over constant clock intervals. It is too expensive, so that calculation is driven by outgoing packets. When queue is idle we have to model this clock by hands. - SF+VJ proposed to "generate" m = (idletime/bandwidth)*average_pkt_size + SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth) dummy packets as burst after idle time, i.e. q->qave *= (1-W)^m @@ -175,129 +176,193 @@ I believe, that a simpler model may be used here, but it is field for experiments. */ - q->qave >>= q->Stab[(us_idle>>q->Slog)&0xFF]; + q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; } - q->qave += ((q->qbytes<Alog) - q->qave) >> q->Wlog; + q->qave += sch->stats.backlog - (q->qave >> q->Wlog); if (q->qave < q->qth_min) { enqueue: q->qcount = -1; - if (q->qbytes <= q->qmaxbytes) { - skb_queue_tail(&sch->q, skb); - q->qbytes += skb->len; + if (sch->stats.backlog <= q->limit) { + __skb_queue_tail(&sch->q, skb); + sch->stats.backlog += skb->len; + sch->stats.bytes += skb->len; + sch->stats.packets++; return 1; } drop: kfree_skb(skb); + sch->stats.drops++; return 0; } if (q->qave >= q->qth_max) { q->qcount = -1; + sch->stats.overlimits++; goto drop; } - q->qcount++; - if (q->qcount++) { - if ((((q->qave - q->qth_min)*q->qcount)>>q->C1log) < q->qR) + if (++q->qcount) { + if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR) goto enqueue; q->qcount = 0; - q->qR = red_random(q->Rlog); + q->qR = net_random()&q->Rmask; + sch->stats.overlimits++; goto drop; } - q->qR = red_random(q->Rlog); + q->qR = net_random()&q->Rmask; goto enqueue; } +static int +red_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct red_sched_data *q = (struct red_sched_data *)sch->data; + + PSCHED_SET_PASTPERFECT(q->qidlestart); + + __skb_queue_head(&sch->q, skb); + sch->stats.backlog += skb->len; + return 1; +} + static struct sk_buff * red_dequeue(struct Qdisc* sch) { struct sk_buff *skb; struct red_sched_data *q = (struct red_sched_data *)sch->data; - skb = skb_dequeue(&sch->q); + skb = __skb_dequeue(&sch->q); if (skb) { - q->qbytes -= skb->len; + sch->stats.backlog -= skb->len; return skb; } PSCHED_GET_TIME(q->qidlestart); return NULL; } -static void -red_reset(struct Qdisc* sch) +static int +red_drop(struct Qdisc* sch) { - struct red_sched_data *q = (struct red_sched_data *)sch->data; struct sk_buff *skb; + struct red_sched_data *q = (struct red_sched_data *)sch->data; - while((skb=skb_dequeue(&sch->q))!=NULL) { - q->qbytes -= skb->len; + skb = __skb_dequeue_tail(&sch->q); + if (skb) { + sch->stats.backlog -= skb->len; + sch->stats.drops++; kfree_skb(skb); + return 1; } - if (q->qbytes) { - printk("red_reset: qbytes=%lu\n", q->qbytes); - q->qbytes = 0; - } + PSCHED_GET_TIME(q->qidlestart); + return 0; +} + +static void red_reset(struct Qdisc* sch) +{ + struct red_sched_data *q = (struct red_sched_data *)sch->data; + struct sk_buff *skb; + + while((skb=__skb_dequeue(&sch->q))!=NULL) + kfree_skb(skb); + sch->stats.backlog = 0; PSCHED_SET_PASTPERFECT(q->qidlestart); q->qave = 0; q->qcount = -1; } -static int red_init(struct Qdisc *sch, struct pschedctl *pctl) +static int red_init(struct Qdisc *sch, struct rtattr *opt) { - struct red_sched_data *q; - struct redctl *ctl = (struct redctl*)pctl->args; - - q = (struct red_sched_data *)sch->data; + struct red_sched_data *q = (struct red_sched_data *)sch->data; + struct rtattr *tb[TCA_RED_STAB]; + struct tc_red_qopt *ctl; - if (pctl->arglen < sizeof(struct redctl)) + if (opt == NULL || + rtattr_parse(tb, TCA_RED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) || + tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 || + RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || + RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256) return -EINVAL; + ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); + q->Wlog = ctl->Wlog; - q->Alog = ctl->Alog; - q->Rlog = ctl->Rlog; - q->C1log = ctl->C1log; - q->Slog = ctl->Slog; - q->qth_min = ctl->qth_min; - q->qth_max = ctl->qth_max; - q->qmaxbytes = ctl->qmaxbytes; - memcpy(q->Stab, ctl->Stab, 256); + q->Plog = ctl->Plog; + q->Rmask = ctl->Plog < 32 ? ((1<Plog) - 1) : ~0UL; + q->Scell_log = ctl->Scell_log; + q->Scell_max = (256<Scell_log)-1; + q->qth_min = ctl->qth_min<Wlog; + q->qth_max = ctl->qth_max<Wlog; + q->limit = ctl->limit; + memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256); q->qcount = -1; PSCHED_SET_PASTPERFECT(q->qidlestart); + MOD_INC_USE_COUNT; return 0; } -struct Qdisc_ops red_ops = +#ifdef CONFIG_RTNETLINK +static int red_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct red_sched_data *q = (struct red_sched_data *)sch->data; + unsigned char *b = skb->tail; + struct rtattr *rta; + struct tc_red_qopt opt; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + opt.limit = q->limit; + opt.qth_min = q->qth_min>>q->Wlog; + opt.qth_max = q->qth_max>>q->Wlog; + opt.Wlog = q->Wlog; + opt.Plog = q->Plog; + opt.Scell_log = q->Scell_log; + RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); + rta->rta_len = skb->tail - b; + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +static void red_destroy(struct Qdisc *sch) +{ + MOD_DEC_USE_COUNT; +} + +struct Qdisc_ops red_qdisc_ops = { NULL, + NULL, "red", - 0, sizeof(struct red_sched_data), + red_enqueue, red_dequeue, - red_reset, - NULL, + red_requeue, + red_drop, + red_init, - NULL + red_reset, + red_destroy, + +#ifdef CONFIG_RTNETLINK + red_dump, +#endif }; #ifdef MODULE -#include int init_module(void) { - int err; - - /* Load once and never free it. */ - MOD_INC_USE_COUNT; - - err = register_qdisc(&red_ops); - if (err) - MOD_DEC_USE_COUNT; - return err; + return register_qdisc(&red_qdisc_ops); } void cleanup_module(void) { + unregister_qdisc(&red_qdisc_ops); } #endif diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_sfq.c linux/net/sched/sch_sfq.c --- v2.1.98/linux/net/sched/sch_sfq.c Mon Feb 23 18:12:14 1998 +++ linux/net/sched/sch_sfq.c Tue Apr 28 11:10:11 1998 @@ -1,5 +1,5 @@ /* - * net/sched/sch_sfq.c Stochastic Fairness Queueing scheduler. + * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -9,6 +9,8 @@ * Authors: Alexey Kuznetsov, */ +#include +#include #include #include #include @@ -30,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -84,14 +87,12 @@ scattered over different locations. It is not good, but it allowed to put it into 4K. - It is easy to increase these values. + It is easy to increase these values, but not in flight. */ #define SFQ_DEPTH 128 #define SFQ_HASH_DIVISOR 1024 -#define SFQ_HASH(a) 0 - /* This type should contain at least SFQ_DEPTH*2 values */ typedef unsigned char sfq_index; @@ -104,9 +105,12 @@ struct sfq_sched_data { /* Parameters */ + int perturb_period; unsigned quantum; /* Allotment per round: MUST BE >= MTU */ /* Variables */ + struct timer_list perturb_timer; + int perturbation; sfq_index tail; /* Index of current slot in round */ sfq_index max_depth; /* Maximal depth */ @@ -118,10 +122,59 @@ struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ }; +static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) +{ + int pert = q->perturbation; + + /* Have we any rotation primitives? If not, WHY? */ + h ^= (h1<>(0x1F - pert)); + h ^= h>>10; + return h & 0x3FF; +} + +#ifndef IPPROTO_ESP +#define IPPROTO_ESP 50 +#endif + +static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) +{ + u32 h, h2; + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + { + struct iphdr *iph = skb->nh.iph; + h = iph->daddr; + h2 = iph->saddr^iph->protocol; + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && + (iph->protocol == IPPROTO_TCP || + iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_ESP)) + h2 ^= *(((u32*)iph) + iph->ihl); + break; + } + case __constant_htons(ETH_P_IPV6): + { + struct ipv6hdr *iph = skb->nh.ipv6h; + h = iph->daddr.s6_addr32[3]; + h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; + if (iph->nexthdr == IPPROTO_TCP || + iph->nexthdr == IPPROTO_UDP || + iph->nexthdr == IPPROTO_ESP) + h2 ^= *(u32*)&iph[1]; + break; + } + default: + h = (u32)(unsigned long)skb->dst^skb->protocol; + h2 = (u32)(unsigned long)skb->sk; + } + return sfq_fold_hash(q, h, h2); +} + extern __inline__ void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; - int d = q->qs[x].qlen; + int d = q->qs[x].qlen + SFQ_DEPTH; p = d; n = q->dep[d].next; @@ -161,47 +214,49 @@ sfq_link(q, x); } -static __inline__ void sfq_drop(struct sfq_sched_data *q) +static int sfq_drop(struct Qdisc *sch) { - struct sk_buff *skb; + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; sfq_index d = q->max_depth; + struct sk_buff *skb; /* Queue is full! Find the longest slot and drop a packet from it */ - if (d != 1) { - sfq_index x = q->dep[d].next; + if (d > 1) { + sfq_index x = q->dep[d+SFQ_DEPTH].next; skb = q->qs[x].prev; __skb_unlink(skb, &q->qs[x]); kfree_skb(skb); sfq_dec(q, x); -/* sch->q.qlen--; - */ - return; + sch->stats.drops++; + return 1; } - /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ + if (d == 1) { + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ + d = q->next[q->tail]; + q->next[q->tail] = q->next[d]; + q->allot[q->next[d]] += q->quantum; + skb = q->qs[d].prev; + __skb_unlink(skb, &q->qs[d]); + kfree_skb(skb); + sfq_dec(q, d); + sch->q.qlen--; + q->ht[q->hash[d]] = SFQ_DEPTH; + sch->stats.drops++; + return 1; + } - d = q->next[q->tail]; - q->next[q->tail] = q->next[d]; - q->allot[q->next[d]] += q->quantum; - skb = q->qs[d].prev; - __skb_unlink(skb, &q->qs[d]); - kfree_skb(skb); - sfq_dec(q, d); -/* - sch->q.qlen--; - */ - q->ht[q->hash[d]] = SFQ_DEPTH; - return; + return 0; } static int sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; - unsigned hash = SFQ_HASH(skb); + unsigned hash = sfq_hash(q, skb); sfq_index x; x = q->ht[hash]; @@ -222,13 +277,52 @@ q->tail = x; } } + if (++sch->q.qlen < SFQ_DEPTH-1) { + sch->stats.bytes += skb->len; + sch->stats.packets++; + return 1; + } + + sfq_drop(sch); + return 0; +} + +static int +sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; + unsigned hash = sfq_hash(q, skb); + sfq_index x; + + x = q->ht[hash]; + if (x == SFQ_DEPTH) { + q->ht[hash] = x = q->dep[SFQ_DEPTH].next; + q->hash[x] = hash; + } + __skb_queue_head(&q->qs[x], skb); + sfq_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ + if (q->tail == SFQ_DEPTH) { /* It is the first flow */ + q->tail = x; + q->next[x] = x; + q->allot[x] = q->quantum; + } else { + q->next[x] = q->next[q->tail]; + q->next[q->tail] = x; + q->tail = x; + } + } if (++sch->q.qlen < SFQ_DEPTH-1) return 1; - sfq_drop(q); + sch->stats.drops++; + sfq_drop(sch); return 0; } + + + static struct sk_buff * sfq_dequeue(struct Qdisc* sch) { @@ -273,13 +367,28 @@ kfree_skb(skb); } +static void sfq_perturbation(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc*)arg; + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; + + q->perturbation = net_random()&0x1F; + q->perturb_timer.expires = jiffies + q->perturb_period; + + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } +} -static int sfq_open(struct Qdisc *sch, void *arg) +static int sfq_init(struct Qdisc *sch, struct rtattr *opt) { - struct sfq_sched_data *q; + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; int i; - q = (struct sfq_sched_data *)sch->data; + q->perturb_timer.data = (unsigned long)sch; + q->perturb_timer.function = sfq_perturbation; + init_timer(&q->perturb_timer); for (i=0; iht[i] = SFQ_DEPTH; @@ -290,43 +399,89 @@ } q->max_depth = 0; q->tail = SFQ_DEPTH; - q->quantum = sch->dev->mtu; - if (sch->dev->hard_header) - q->quantum += sch->dev->hard_header_len; + if (opt == NULL) { + q->quantum = sch->dev->mtu; + q->perturb_period = 0; + if (sch->dev->hard_header) + q->quantum += sch->dev->hard_header_len; + } else { + struct tc_sfq_qopt *ctl = RTA_DATA(opt); + if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) + return -EINVAL; + q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->perturb_period = ctl->perturb_period*HZ; + /* The rest is compiled in */ + } for (i=0; iperturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } + MOD_INC_USE_COUNT; return 0; } +static void sfq_destroy(struct Qdisc *sch) +{ + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; + del_timer(&q->perturb_timer); + MOD_DEC_USE_COUNT; +} -struct Qdisc_ops sfq_ops = +#ifdef CONFIG_RTNETLINK +static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) { + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; + unsigned char *b = skb->tail; + struct tc_sfq_qopt opt; + + opt.quantum = q->quantum; + opt.perturb_period = q->perturb_period/HZ; + + opt.limit = SFQ_DEPTH; + opt.divisor = SFQ_HASH_DIVISOR; + opt.flows = SFQ_DEPTH; + + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +struct Qdisc_ops sfq_qdisc_ops = +{ + NULL, NULL, "sfq", - 0, sizeof(struct sfq_sched_data), + sfq_enqueue, sfq_dequeue, + sfq_requeue, + sfq_drop, + + sfq_init, sfq_reset, - NULL, - sfq_open, + sfq_destroy, + +#ifdef CONFIG_RTNETLINK + sfq_dump, +#endif }; #ifdef MODULE int init_module(void) { - int err; - - /* Load once and never free it. */ - MOD_INC_USE_COUNT; - - err = register_qdisc(&sfq_ops); - if (err) - MOD_DEC_USE_COUNT; - return err; + return register_qdisc(&sfq_qdisc_ops); } void cleanup_module(void) { + unregister_qdisc(&sfq_qdisc_ops); } #endif diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_tbf.c linux/net/sched/sch_tbf.c --- v2.1.98/linux/net/sched/sch_tbf.c Thu Feb 12 20:56:15 1998 +++ linux/net/sched/sch_tbf.c Tue Apr 28 11:10:11 1998 @@ -1,5 +1,5 @@ /* - * net/sched/sch_tbf.c Token Bucket Filter. + * net/sched/sch_tbf.c Token Bucket Filter queue. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -10,6 +10,8 @@ * */ +#include +#include #include #include #include @@ -39,69 +41,91 @@ ======================================= SOURCE. + ------- None. - ALGORITHM. + Description. + ------------ + + Data flow obeys TBF with rate R and depth B, if for any + time interval t_i...t_f number of transmitted bits + does not exceed B + R*(t_f-t_i). + + Packetized version of this definition: + sequence of packets of sizes s_i served at moments t_i + obeys TBF, if for any i<=k: + + s_i+....+s_k <= B + R*(t_k - t_i) + + Algorithm. + ---------- + + Let N(t_i) be B/R initially and N(t) grows continuously with time as: + + N(t+delta) = min{B/R, N(t) + delta} + + If the first packet in queue has length S, it may be + transmited only at the time t_* when S/R <= N(t_*), + and in this case N(t) jumps: + + N(t_* + 0) = N(t_* - 0) - S/R. + + - Sequence of packets satisfy token bucket filter with - rate $r$ and depth $b$, if all the numbers defined by: - \begin{eqnarray*} - n_0 &=& b, \\ - n_i &=& {\rm max} ( b, n_{i-1} + r*(t_i-t_{i-1}) - L_i ), - \end{eqnarray*} - where $t_i$ --- departure time of $i$-th packet and - $L_i$ -- its length, never less than zero. - - It is convenient to rescale $n_i$ by factor $r$, so - that the sequence has "canonical" form: - \[ - n_0 = b/r, - n_i = max { b/r, n_{i-1} + t_i - t_{i-1} - L_i/r }, - \] + Actually, QoS requires two TBF to be applied to data stream. + One of them controls steady state burst size, another + with rate P (peak rate) and depth M (equal to link MTU) + limits bursts at smaller time scale. + + Apparently, P>R, and B>M. If P is infinity, this double + TBF is equivalent to single one. + + When TBF works in reshaping mode, latency is estimated as: + + lat = max ((L-B)/R, (L-M)/P) - If a packet has n_i < 0, we throttle filter - by $-n_i$ usecs. NOTES. + ------ If TBF throttles, it starts watchdog timer, which will wake up it - after 0...10 msec. + when it will be ready to transmit. + Note, that minimal timer resolution is 1/HZ. If no new packets will arrive during this period, or device will not be awaken by EOI for previous packet, - tbf could stop its activity for 10 msec. + tbf could stop its activity for 1/HZ. + + + It means, that with depth B, the maximal rate is + + R_crit = B*HZ - It means that tbf will sometimes introduce pathological - 10msec delays to flow corresponding to rate*10msec bytes. - For 10Mbit/sec flow it is about 12Kb, on 100Mbit/sec -- ~100Kb. - This number puts lower reasonbale bound on token bucket depth, - but even if depth is larger traffic is erratic at large rates. - - This problem is not specific for THIS implementation. Really, - there exists statement that any attempt to shape traffic - in transit will increase delays and jitter much more than - we expected naively. + F.e. for 10Mbit ethernet and HZ=100 minimal allowed B is ~10Kbytes. - Particularily, it means that delay/jitter sensitive traffic - MUST NOT be shaped. Cf. CBQ (wrong) and CSZ (correct) approaches. + Note, that peak rate TBF is much more tough: with MTU 1500 + P_crit = 150Kbytes/sec. So that, if you need greater peak + rates, use alpha with HZ=1000 :-) */ struct tbf_sched_data { /* Parameters */ - int cell_log; /* 1<= MTU/B */ - unsigned long max_bytes; /* Maximal length of backlog: bytes */ + u32 limit; /* Maximal length of backlog: bytes */ + u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ + u32 mtu; + struct qdisc_rate_table *R_tab; + struct qdisc_rate_table *P_tab; /* Variables */ - unsigned long bytes; /* Current length of backlog */ - unsigned long tokens; /* Current number of tokens */ + long tokens; /* Current number of B tokens */ + long ptokens; /* Current number of P tokens */ psched_time_t t_c; /* Time check-point */ struct timer_list wd_timer; /* Watchdog timer */ }; -#define L2T(q,L) ((q)->L_tab[(L)>>(q)->cell_log]) +#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log]) +#define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log]) static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) @@ -109,30 +133,56 @@ struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; __skb_queue_tail(&sch->q, skb); - if ((q->bytes += skb->len) <= q->max_bytes) + if ((sch->stats.backlog += skb->len) <= q->limit) { + sch->stats.bytes += skb->len; + sch->stats.packets++; return 1; + } /* Drop action: undo the things that we just made, * i.e. make tail drop */ __skb_unlink(skb, &sch->q); - q->bytes -= skb->len; - kfree_skb(skb); + sch->stats.backlog -= skb->len; + sch->stats.drops++; +#ifdef CONFIG_NET_CLS_POLICE + if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch)) +#endif + kfree_skb(skb); + return 0; +} + +static int +tbf_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + __skb_queue_head(&sch->q, skb); + sch->stats.backlog += skb->len; + return 1; +} + +static int +tbf_drop(struct Qdisc* sch) +{ + struct sk_buff *skb; + + skb = __skb_dequeue_tail(&sch->q); + if (skb) { + sch->stats.backlog -= skb->len; + sch->stats.drops++; + kfree_skb(skb); + return 1; + } return 0; } static void tbf_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc*)arg; - struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; - - q->wd_timer.function = NULL; qdisc_wakeup(sch->dev); } - static struct sk_buff * tbf_dequeue(struct Qdisc* sch) { @@ -144,19 +194,42 @@ if (skb) { psched_time_t now; long toks; + long ptoks = 0; PSCHED_GET_TIME(now); - toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->depth, 0) - + q->tokens - L2T(q,skb->len); + toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer, 0); + + if (q->P_tab) { + ptoks = toks + q->ptokens; + if (ptoks > (long)q->mtu) + ptoks = q->mtu; + ptoks -= L2T_P(q, skb->len); + } + toks += q->tokens; + if (toks > (long)q->buffer) + toks = q->buffer; + toks -= L2T(q, skb->len); - if (toks >= 0) { + if ((toks|ptoks) >= 0) { q->t_c = now; - q->tokens = toks <= q->depth ? toks : q->depth; - q->bytes -= skb->len; + q->tokens = toks; + q->ptokens = ptoks; + sch->stats.backlog -= skb->len; return skb; } + if (!sch->dev->tbusy) { + long delay = PSCHED_US2JIFFIE(max(-toks, -ptoks)); + + if (delay == 0) + delay = 1; + + del_timer(&q->wd_timer); + q->wd_timer.expires = jiffies + delay; + add_timer(&q->wd_timer); + } + /* Maybe, we have in queue a shorter packet, which can be sent now. It sounds cool, but, however, wrong in principle. @@ -164,17 +237,12 @@ Really, if we splitted flow to independent subflows, it would be very good solution. - Look at sch_csz.c. + It is main idea of all FQ algorithms + (cf. CSZ, HPFQ, HFCS) */ __skb_queue_head(&sch->q, skb); - if (!sch->dev->tbusy) { - if (q->wd_timer.function) - del_timer(&q->wd_timer); - q->wd_timer.function = tbf_watchdog; - q->wd_timer.expires = jiffies + PSCHED_US2JIFFIE(-toks); - add_timer(&q->wd_timer); - } + sch->stats.overlimits++; } return NULL; } @@ -184,69 +252,135 @@ tbf_reset(struct Qdisc* sch) { struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; - struct sk_buff *skb; - while ((skb = __skb_dequeue(&sch->q)) != NULL) - kfree_skb(skb); - q->bytes = 0; + skb_queue_purge(&sch->q); + sch->stats.backlog = 0; PSCHED_GET_TIME(q->t_c); - q->tokens = q->depth; - if (q->wd_timer.function) { - del_timer(&q->wd_timer); - q->wd_timer.function = NULL; - } + q->tokens = q->buffer; + q->ptokens = q->mtu; + del_timer(&q->wd_timer); } -static int tbf_init(struct Qdisc* sch, void *arg) +static int tbf_init(struct Qdisc* sch, struct rtattr *opt) { struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; - struct tbfctl *ctl = (struct tbfctl*)arg; + struct rtattr *tb[TCA_TBF_PTAB]; + struct tc_tbf_qopt *qopt; + + MOD_INC_USE_COUNT; + + if (opt == NULL || + rtattr_parse(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) || + tb[TCA_TBF_PARMS-1] == NULL || + RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt)) { + MOD_DEC_USE_COUNT; + return -EINVAL; + } + + qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]); + q->R_tab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]); + if (q->R_tab == NULL) { + MOD_DEC_USE_COUNT; + return -EINVAL; + } + + if (qopt->peakrate.rate) { + q->P_tab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_PTAB-1]); + if (q->P_tab == NULL) { + MOD_DEC_USE_COUNT; + qdisc_put_rtab(q->R_tab); + return -EINVAL; + } + } PSCHED_GET_TIME(q->t_c); init_timer(&q->wd_timer); - q->wd_timer.function = NULL; + q->wd_timer.function = tbf_watchdog; q->wd_timer.data = (unsigned long)sch; - if (ctl) { - q->max_bytes = ctl->bytes; - q->depth = ctl->depth; - q->tokens = q->tokens; - q->cell_log = ctl->cell_log; - memcpy(q->L_tab, ctl->L_tab, 256*sizeof(unsigned long)); - } + q->limit = qopt->limit; + q->mtu = qopt->mtu; + if (q->mtu == 0) + q->mtu = psched_mtu(sch->dev); + q->buffer = qopt->buffer; + q->tokens = q->buffer; + q->ptokens = q->mtu; return 0; } -struct Qdisc_ops tbf_ops = +static void tbf_destroy(struct Qdisc *sch) { + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + + del_timer(&q->wd_timer); + + if (q->P_tab) + qdisc_put_rtab(q->P_tab); + if (q->R_tab) + qdisc_put_rtab(q->R_tab); + + MOD_DEC_USE_COUNT; +} + +#ifdef CONFIG_RTNETLINK +static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + unsigned char *b = skb->tail; + struct rtattr *rta; + struct tc_tbf_qopt opt; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + + opt.limit = q->limit; + opt.rate = q->R_tab->rate; + if (q->P_tab) + opt.peakrate = q->P_tab->rate; + else + memset(&opt.peakrate, 0, sizeof(opt.peakrate)); + opt.mtu = q->mtu; + opt.buffer = q->buffer; + RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt); + rta->rta_len = skb->tail - b; + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +struct Qdisc_ops tbf_qdisc_ops = +{ + NULL, NULL, "tbf", - 0, sizeof(struct tbf_sched_data), + tbf_enqueue, tbf_dequeue, - tbf_reset, - NULL, + tbf_requeue, + tbf_drop, + tbf_init, - NULL, + tbf_reset, + tbf_destroy, + +#ifdef CONFIG_RTNETLINK + tbf_dump, +#endif }; #ifdef MODULE -#include int init_module(void) { - int err; - - /* Load once and never free it. */ - MOD_INC_USE_COUNT; - - err = register_qdisc(&tbf_ops); - if (err) - MOD_DEC_USE_COUNT; - return err; + return register_qdisc(&tbf_qdisc_ops); } void cleanup_module(void) { + unregister_qdisc(&tbf_qdisc_ops); } #endif diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_teql.c linux/net/sched/sch_teql.c --- v2.1.98/linux/net/sched/sch_teql.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_teql.c Wed Apr 29 22:46:59 1998 @@ -0,0 +1,474 @@ +/* net/sched/sch_teql.c "True" (or "trivial") link equalizer. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + How to setup it. + ---------------- + + After loading this module you will find new device teqlN + and new qdisc with the same name. To join a slave to equalizer + you should just set this qdisc on a device f.e. + + # tc qdisc add dev eth0 root teql0 + # tc qdisc add dev eth1 root teql0 + + That's all. Full PnP 8) + + Applicability. + -------------- + + 1. Slave devices MUST be active devices i.e. must raise tbusy + signal and generate EOI event. If you want to equalize virtual devices + sort of tunnels, use normal eql device. + 2. This device puts no limitations on physical slave characteristics + f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-) + Certainly, large difference in link speeds will make resulting eqalized + link unusable, because of huge packet reordering. I estimated upper + useful difference as ~10 times. + 3. If slave requires address resolution, only protocols using + neighbour cache (IPv4/IPv6) will work over equalized link. + Another protocols still are allowed to use slave device directly, + which will not break load balancing, though native slave + traffic will have the highest priority. + */ + +struct teql_master +{ + struct Qdisc_ops qops; + struct device dev; + struct Qdisc *slaves; + struct net_device_stats stats; + char name[IFNAMSIZ]; +}; + +struct teql_sched_data +{ + struct Qdisc *next; + struct teql_master *m; + struct neighbour *ncache; + struct sk_buff_head q; +}; + +#define NEXT_SLAVE(q) (((struct teql_sched_data*)((q)->data))->next) + +#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST) + +/* "teql*" qdisc routines */ + +static int +teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct device *dev = sch->dev; + struct teql_sched_data *q = (struct teql_sched_data *)sch->data; + + __skb_queue_tail(&q->q, skb); + if (q->q.qlen <= dev->tx_queue_len) { + sch->stats.bytes += skb->len; + sch->stats.packets++; + return 1; + } + + __skb_unlink(skb, &q->q); + kfree_skb(skb); + sch->stats.drops++; + return 0; +} + +static int +teql_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct teql_sched_data *q = (struct teql_sched_data *)sch->data; + + __skb_queue_head(&q->q, skb); + return 1; +} + +static struct sk_buff * +teql_dequeue(struct Qdisc* sch) +{ + struct teql_sched_data *dat = (struct teql_sched_data *)sch->data; + struct sk_buff *skb; + + skb = __skb_dequeue(&dat->q); + if (skb == NULL) { + struct device *m = dat->m->dev.qdisc->dev; + if (m) { + m->tbusy = 0; + dat->m->slaves = sch; + qdisc_restart(m); + } + } + sch->q.qlen = dat->q.qlen + dat->m->dev.qdisc->q.qlen; + return skb; +} + +static __inline__ void +teql_neigh_release(struct neighbour *n) +{ + if (n) + neigh_release(n); +} + +static void +teql_reset(struct Qdisc* sch) +{ + struct teql_sched_data *dat = (struct teql_sched_data *)sch->data; + + skb_queue_purge(&dat->q); + sch->q.qlen = 0; + teql_neigh_release(xchg(&dat->ncache, NULL)); +} + +static void +teql_destroy(struct Qdisc* sch) +{ + struct Qdisc *q, *prev; + struct teql_sched_data *dat = (struct teql_sched_data *)sch->data; + struct teql_master *master = dat->m; + + if ((prev = master->slaves) != NULL) { + do { + q = NEXT_SLAVE(prev); + if (q == sch) { + NEXT_SLAVE(prev) = NEXT_SLAVE(q); + if (q == master->slaves) { + master->slaves = NEXT_SLAVE(q); + if (q == master->slaves) + master->slaves = NULL; + } + skb_queue_purge(&dat->q); + teql_neigh_release(xchg(&dat->ncache, NULL)); + break; + } + + } while ((prev = q) != master->slaves); + } + + MOD_DEC_USE_COUNT; +} + +static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt) +{ + struct device *dev = sch->dev; + struct teql_master *m = (struct teql_master*)sch->ops; + struct teql_sched_data *q = (struct teql_sched_data *)sch->data; + + if (dev->hard_header_len > m->dev.hard_header_len) + return -EINVAL; + + q->m = m; + + skb_queue_head_init(&q->q); + + if (m->slaves) { + if (m->dev.flags & IFF_UP) { + if ((m->dev.flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT)) + || (m->dev.flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST)) + || (m->dev.flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST)) + || dev->mtu < m->dev.mtu) + return -EINVAL; + } else { + if (!(dev->flags&IFF_POINTOPOINT)) + m->dev.flags &= ~IFF_POINTOPOINT; + if (!(dev->flags&IFF_BROADCAST)) + m->dev.flags &= ~IFF_BROADCAST; + if (!(dev->flags&IFF_MULTICAST)) + m->dev.flags &= ~IFF_MULTICAST; + if (dev->mtu < m->dev.mtu) + m->dev.mtu = dev->mtu; + } + q->next = NEXT_SLAVE(m->slaves); + NEXT_SLAVE(m->slaves) = sch; + } else { + q->next = sch; + m->slaves = sch; + m->dev.mtu = dev->mtu; + m->dev.flags = (m->dev.flags&~FMASK)|(dev->flags&FMASK); + } + + MOD_INC_USE_COUNT; + return 0; +} + +/* "teql*" netdevice routines */ + +static int +__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct device *dev) +{ + struct teql_sched_data *q = (void*)dev->qdisc->data; + struct neighbour *mn = skb->dst->neighbour; + struct neighbour *n = q->ncache; + + if (mn->tbl == NULL) + return -EINVAL; + if (n && n->tbl == mn->tbl && + memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) { + atomic_inc(&n->refcnt); + } else { + n = __neigh_lookup(mn->tbl, mn->primary_key, dev, 1); + if (n == NULL) + return -ENOBUFS; + } + if (neigh_event_send(n, skb_res) == 0) { + if (dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len) < 0) { + neigh_release(n); + return -EINVAL; + } + teql_neigh_release(xchg(&q->ncache, n)); + return 0; + } + neigh_release(n); + return (skb_res != NULL); +} + +static __inline__ int +teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct device *dev) +{ + if (dev->hard_header == NULL || + skb->dst == NULL || + skb->dst->neighbour == NULL) + return 0; + return __teql_resolve(skb, skb_res, dev); +} + +static int teql_master_xmit(struct sk_buff *skb, struct device *dev) +{ + struct teql_master *master = (void*)dev->priv; + struct Qdisc *start, *q; + int busy; + int nores; + struct sk_buff *skb_res = NULL; + + dev->tbusy = 1; + + start = master->slaves; + +restart: + nores = 0; + busy = 1; + + if ((q = start) == NULL) + goto drop; + + do { + struct device *slave = q->dev; + + if (!slave->tbusy && slave->qdisc_sleeping == q) { + busy = 0; + + if (q->h.forw == NULL) { + q->h.forw = qdisc_head.forw; + qdisc_head.forw = &q->h; + } + + switch (teql_resolve(skb, skb_res, slave)) { + case 0: + if (slave->hard_start_xmit(skb, slave) == 0) { + master->slaves = NEXT_SLAVE(q); + dev->tbusy = 0; + return 0; + } + break; + case 1: + nores = 1; + break; + default: + master->slaves = NEXT_SLAVE(q); + dev->tbusy = 0; + return 0; + } + __skb_pull(skb, skb->nh.raw - skb->data); + } + } while ((q = NEXT_SLAVE(q)) != start); + + if (nores && skb_res == NULL) { + skb_res = skb; + goto restart; + } + + dev->tbusy = busy; + if (busy) + return 1; + +drop: + dev_kfree_skb(skb); + return 0; +} + +static int teql_master_open(struct device *dev) +{ + struct Qdisc * q; + struct teql_master *m = (void*)dev->priv; + int mtu = 0xFFFE; + unsigned flags = IFF_NOARP|IFF_MULTICAST; + + if (m->slaves == NULL) + return -EUNATCH; + + flags = FMASK; + + q = m->slaves; + do { + struct device *slave = q->dev; + + if (slave == NULL) + return -EUNATCH; + + if (slave->mtu < mtu) + mtu = slave->mtu; + if (slave->hard_header_len > LL_MAX_HEADER) + return -EINVAL; + + /* If all the slaves are BROADCAST, master is BROADCAST + If all the slaves are PtP, master is PtP + Otherwise, master is NBMA. + */ + if (!(slave->flags&IFF_POINTOPOINT)) + flags &= ~IFF_POINTOPOINT; + if (!(slave->flags&IFF_BROADCAST)) + flags &= ~IFF_BROADCAST; + if (!(slave->flags&IFF_MULTICAST)) + flags &= ~IFF_MULTICAST; + } while ((q = NEXT_SLAVE(q)) != m->slaves); + + m->dev.mtu = mtu; + m->dev.flags = (m->dev.flags&~FMASK) | flags; + MOD_INC_USE_COUNT; + return 0; +} + +static int teql_master_close(struct device *dev) +{ + MOD_DEC_USE_COUNT; + return 0; +} + +static struct net_device_stats *teql_master_stats(struct device *dev) +{ + struct teql_master *m = (void*)dev->priv; + return &m->stats; +} + +static int teql_master_mtu(struct device *dev, int new_mtu) +{ + struct teql_master *m = (void*)dev->priv; + struct Qdisc *q; + + if (new_mtu < 68) + return -EINVAL; + + q = m->slaves; + if (q) { + do { + if (new_mtu > q->dev->mtu) + return -EINVAL; + } while ((q=NEXT_SLAVE(q)) != m->slaves); + } + + dev->mtu = new_mtu; + return 0; +} + +static int teql_master_init(struct device *dev) +{ + dev->open = teql_master_open; + dev->hard_start_xmit = teql_master_xmit; + dev->stop = teql_master_close; + dev->get_stats = teql_master_stats; + dev->change_mtu = teql_master_mtu; + dev->type = 0; + dev->mtu = 1500; + dev->tx_queue_len = 100; + dev->flags = IFF_NOARP; + dev->hard_header_len = LL_MAX_HEADER; + return 0; +} + +static struct teql_master the_master = { +{ + NULL, + NULL, + "", + sizeof(struct teql_sched_data), + + teql_enqueue, + teql_dequeue, + teql_requeue, + NULL, + + teql_qdisc_init, + teql_reset, + teql_destroy, +},}; + + +#ifdef MODULE +int init_module(void) +#else +__initfunc(int teql_init(void)) +#endif +{ + int err; + + rtnl_lock(); + + the_master.dev.priv = (void*)&the_master; + the_master.dev.name = (void*)&the_master.name; + err = dev_alloc_name(&the_master.dev, "teql%d"); + if (err < 0) + return err; + memcpy(the_master.qops.id, the_master.name, IFNAMSIZ); + the_master.dev.init = teql_master_init; + + err = register_netdevice(&the_master.dev); + if (err == 0) { + err = register_qdisc(&the_master.qops); + if (err) + unregister_netdevice(&the_master.dev); + } + rtnl_unlock(); + return err; +} + +#ifdef MODULE +void cleanup_module(void) +{ + rtnl_lock(); + unregister_qdisc(&the_master.qops); + unregister_netdevice(&the_master.dev); + rtnl_unlock(); +} +#endif diff -u --recursive --new-file v2.1.98/linux/net/unix/Makefile linux/net/unix/Makefile --- v2.1.98/linux/net/unix/Makefile Sun Nov 30 14:00:40 1997 +++ linux/net/unix/Makefile Tue Apr 28 11:10:11 1998 @@ -1,5 +1,5 @@ # -# Makefile for the Linux TCP/IP (INET) layer. +# Makefile for the Linux unix domain socket layer. # # Note! Dependencies are done automagically by 'make dep', which also # removes any old dependencies. DON'T put your own dependencies here diff -u --recursive --new-file v2.1.98/linux/net/wanrouter/Makefile linux/net/wanrouter/Makefile --- v2.1.98/linux/net/wanrouter/Makefile Sun Feb 2 05:18:52 1997 +++ linux/net/wanrouter/Makefile Tue Apr 28 11:10:11 1998 @@ -8,7 +8,8 @@ # Note 2! The CFLAGS definition is now in the main makefile... O_TARGET := wanrouter.o -O_OBJS := wanmain.o wanproc.o +OX_OBJS := wanmain.o +O_OBJS := wanproc.o M_OBJS := $(O_TARGET) include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.98/linux/net/wanrouter/wanmain.c linux/net/wanrouter/wanmain.c --- v2.1.98/linux/net/wanrouter/wanmain.c Mon Feb 23 18:12:15 1998 +++ linux/net/wanrouter/wanmain.c Tue Apr 28 11:10:11 1998 @@ -18,11 +18,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * ============================================================================ -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Jun 27, 1997 Alan Cox realigned with vendor code +* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) * Jan 16, 1997 Gene Kozin router_devlist made public * Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) +* Jun 27, 1997 Alan Cox realigned with vendor code +* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 +* Apr 20, 1998 Alan Cox Fixed 2.1 symbols *****************************************************************************/ #include /* offsetof(), etc. */ @@ -165,6 +166,7 @@ * Context: process */ + int register_wan_device(wan_device_t* wandev) { int err, namelen; @@ -223,6 +225,7 @@ * <0 error. * Context: process */ + int unregister_wan_device(char* name) { @@ -269,6 +272,7 @@ * 1. This function may be called on interrupt context. */ + int wanrouter_encapsulate (struct sk_buff* skb, struct device* dev) { int hdr_len = 0; @@ -310,6 +314,7 @@ * 1. This function may be called on interrupt context. */ + unsigned short wanrouter_type_trans (struct sk_buff* skb, struct device* dev) { int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ @@ -679,6 +684,14 @@ return 0; } +#ifdef MODULE +EXPORT_SYMBOL(register_wan_device); +EXPORT_SYMBOL(unregister_wan_device); +EXPORT_SYMBOL(wanrouter_encapsulate); +EXPORT_SYMBOL(wanrouter_type_trans); +#endif + /* * End */ +