diff -uNr linux_org/Documentation/Configure.help linux/Documentation/Configure.help --- linux_org/Documentation/Configure.help 2006-10-27 14:08:20.000000000 +0200 +++ linux/Documentation/Configure.help 2006-10-27 14:11:52.000000000 +0200 @@ -2848,6 +2848,31 @@ If you want to compile it as a module, say M here and read <file:Documentation/modules.txt>. If unsure, say `Y'. +PPTP conntrack and NAT support +CONFIG_IP_NF_PPTP + This module adds support for PPTP (Point to Point Tunnelling Protocol, + RFC2637) conncection tracking and NAT. + + If you are running PPTP sessions over a stateful firewall or NAT box, + you may want to enable this feature. + + Please note that not all PPTP modes of operation are supported yet. + For more info, read top of the file net/ipv4/netfilter/ip_conntrack_pptp.c + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +GRE protocol conntrack and NAT support +CONFIG_IP_NF_CT_PROTO_GRE + This module adds generic support for connection tracking and NAT of the + GRE protocol (RFC1701, RFC2784). Please note that this will only work + with GRE connections using the key field of the GRE header. + + You will need GRE support to enable PPTP support. + + If you want to compile it as a module, say `M' here and read + Documentation/modules.txt. If unsire, say `N'. + User space queueing via NETLINK CONFIG_IP_NF_QUEUE Netfilter has the ability to queue packets to user space: the diff -uNr linux_org/include/linux/netfilter_ipv4/ip_conntrack.h linux/include/linux/netfilter_ipv4/ip_conntrack.h --- linux_org/include/linux/netfilter_ipv4/ip_conntrack.h 2004-11-24 12:13:57.000000000 +0100 +++ linux/include/linux/netfilter_ipv4/ip_conntrack.h 2006-10-27 14:11:52.000000000 +0200 @@ -50,19 +50,23 @@ #include <linux/netfilter_ipv4/ip_conntrack_tcp.h> #include <linux/netfilter_ipv4/ip_conntrack_icmp.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> /* per conntrack: protocol private data */ union ip_conntrack_proto { /* insert conntrack proto private data here */ + struct ip_ct_gre gre; struct ip_ct_tcp tcp; struct ip_ct_icmp icmp; }; union ip_conntrack_expect_proto { /* insert expect proto private data here */ + struct ip_ct_gre_expect gre; }; /* Add protocol helper include file here */ +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> #include <linux/netfilter_ipv4/ip_conntrack_amanda.h> #include <linux/netfilter_ipv4/ip_conntrack_ftp.h> @@ -71,6 +75,7 @@ /* per expectation: application helper private data */ union ip_conntrack_expect_help { /* insert conntrack helper private data (expect) here */ + struct ip_ct_pptp_expect exp_pptp_info; struct ip_ct_amanda_expect exp_amanda_info; struct ip_ct_ftp_expect exp_ftp_info; struct ip_ct_irc_expect exp_irc_info; @@ -85,16 +90,19 @@ /* per conntrack: application helper private data */ union ip_conntrack_help { /* insert conntrack helper private data (master) here */ + struct ip_ct_pptp_master ct_pptp_info; struct ip_ct_ftp_master ct_ftp_info; struct ip_ct_irc_master ct_irc_info; }; #ifdef CONFIG_IP_NF_NAT_NEEDED #include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_pptp.h> /* per conntrack: nat application helper private data */ union ip_conntrack_nat_help { /* insert nat helper private data here */ + struct ip_nat_pptp nat_pptp_info; }; #endif diff -uNr linux_org/include/linux/netfilter_ipv4/ip_conntrack_pptp.h linux/include/linux/netfilter_ipv4/ip_conntrack_pptp.h --- linux_org/include/linux/netfilter_ipv4/ip_conntrack_pptp.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_pptp.h 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,313 @@ +/* PPTP constants and structs */ +#ifndef _CONNTRACK_PPTP_H +#define _CONNTRACK_PPTP_H + +/* state of the control session */ +enum pptp_ctrlsess_state { + PPTP_SESSION_NONE, /* no session present */ + PPTP_SESSION_ERROR, /* some session error */ + PPTP_SESSION_STOPREQ, /* stop_sess request seen */ + PPTP_SESSION_REQUESTED, /* start_sess request seen */ + PPTP_SESSION_CONFIRMED, /* session established */ +}; + +/* state of the call inside the control session */ +enum pptp_ctrlcall_state { + PPTP_CALL_NONE, + PPTP_CALL_ERROR, + PPTP_CALL_OUT_REQ, + PPTP_CALL_OUT_CONF, + PPTP_CALL_IN_REQ, + PPTP_CALL_IN_REP, + PPTP_CALL_IN_CONF, + PPTP_CALL_CLEAR_REQ, +}; + + +/* conntrack private data */ +struct ip_ct_pptp_master { + enum pptp_ctrlsess_state sstate; /* session state */ + + /* everything below is going to be per-expectation in newnat, + * since there could be more than one call within one session */ + enum pptp_ctrlcall_state cstate; /* call state */ + u_int16_t pac_call_id; /* call id of PAC, host byte order */ + u_int16_t pns_call_id; /* call id of PNS, host byte order */ +}; + +/* conntrack_expect private member */ +struct ip_ct_pptp_expect { + enum pptp_ctrlcall_state cstate; /* call state */ + u_int16_t pac_call_id; /* call id of PAC */ + u_int16_t pns_call_id; /* call id of PNS */ +}; + + +#ifdef __KERNEL__ + +#include <linux/netfilter_ipv4/lockhelp.h> +DECLARE_LOCK_EXTERN(ip_pptp_lock); + +#define IP_CONNTR_PPTP PPTP_CONTROL_PORT + +union pptp_ctrl_union { + void *rawreq; + struct PptpStartSessionRequest *sreq; + struct PptpStartSessionReply *srep; + struct PptpStopSessionReqest *streq; + struct PptpStopSessionReply *strep; + struct PptpOutCallRequest *ocreq; + struct PptpOutCallReply *ocack; + struct PptpInCallRequest *icreq; + struct PptpInCallReply *icack; + struct PptpInCallConnected *iccon; + struct PptpClearCallRequest *clrreq; + struct PptpCallDisconnectNotify *disc; + struct PptpWanErrorNotify *wanerr; + struct PptpSetLinkInfo *setlink; +}; + + + +#define PPTP_CONTROL_PORT 1723 + +#define PPTP_PACKET_CONTROL 1 +#define PPTP_PACKET_MGMT 2 + +#define PPTP_MAGIC_COOKIE 0x1a2b3c4d + +struct pptp_pkt_hdr { + __u16 packetLength; + __u16 packetType; + __u32 magicCookie; +}; + +/* PptpControlMessageType values */ +#define PPTP_START_SESSION_REQUEST 1 +#define PPTP_START_SESSION_REPLY 2 +#define PPTP_STOP_SESSION_REQUEST 3 +#define PPTP_STOP_SESSION_REPLY 4 +#define PPTP_ECHO_REQUEST 5 +#define PPTP_ECHO_REPLY 6 +#define PPTP_OUT_CALL_REQUEST 7 +#define PPTP_OUT_CALL_REPLY 8 +#define PPTP_IN_CALL_REQUEST 9 +#define PPTP_IN_CALL_REPLY 10 +#define PPTP_IN_CALL_CONNECT 11 +#define PPTP_CALL_CLEAR_REQUEST 12 +#define PPTP_CALL_DISCONNECT_NOTIFY 13 +#define PPTP_WAN_ERROR_NOTIFY 14 +#define PPTP_SET_LINK_INFO 15 + +#define PPTP_MSG_MAX 15 + +/* PptpGeneralError values */ +#define PPTP_ERROR_CODE_NONE 0 +#define PPTP_NOT_CONNECTED 1 +#define PPTP_BAD_FORMAT 2 +#define PPTP_BAD_VALUE 3 +#define PPTP_NO_RESOURCE 4 +#define PPTP_BAD_CALLID 5 +#define PPTP_REMOVE_DEVICE_ERROR 6 + +struct PptpControlHeader { + __u16 messageType; + __u16 reserved; +}; + +/* FramingCapability Bitmap Values */ +#define PPTP_FRAME_CAP_ASYNC 0x1 +#define PPTP_FRAME_CAP_SYNC 0x2 + +/* BearerCapability Bitmap Values */ +#define PPTP_BEARER_CAP_ANALOG 0x1 +#define PPTP_BEARER_CAP_DIGITAL 0x2 + +struct PptpStartSessionRequest { + __u16 protocolVersion; + __u8 reserved1; + __u8 reserved2; + __u32 framingCapability; + __u32 bearerCapability; + __u16 maxChannels; + __u16 firmwareRevision; + __u8 hostName[64]; + __u8 vendorString[64]; +}; + +/* PptpStartSessionResultCode Values */ +#define PPTP_START_OK 1 +#define PPTP_START_GENERAL_ERROR 2 +#define PPTP_START_ALREADY_CONNECTED 3 +#define PPTP_START_NOT_AUTHORIZED 4 +#define PPTP_START_UNKNOWN_PROTOCOL 5 + +struct PptpStartSessionReply { + __u16 protocolVersion; + __u8 resultCode; + __u8 generalErrorCode; + __u32 framingCapability; + __u32 bearerCapability; + __u16 maxChannels; + __u16 firmwareRevision; + __u8 hostName[64]; + __u8 vendorString[64]; +}; + +/* PptpStopReasons */ +#define PPTP_STOP_NONE 1 +#define PPTP_STOP_PROTOCOL 2 +#define PPTP_STOP_LOCAL_SHUTDOWN 3 + +struct PptpStopSessionRequest { + __u8 reason; +}; + +/* PptpStopSessionResultCode */ +#define PPTP_STOP_OK 1 +#define PPTP_STOP_GENERAL_ERROR 2 + +struct PptpStopSessionReply { + __u8 resultCode; + __u8 generalErrorCode; +}; + +struct PptpEchoRequest { + __u32 identNumber; +}; + +/* PptpEchoReplyResultCode */ +#define PPTP_ECHO_OK 1 +#define PPTP_ECHO_GENERAL_ERROR 2 + +struct PptpEchoReply { + __u32 identNumber; + __u8 resultCode; + __u8 generalErrorCode; + __u16 reserved; +}; + +/* PptpFramingType */ +#define PPTP_ASYNC_FRAMING 1 +#define PPTP_SYNC_FRAMING 2 +#define PPTP_DONT_CARE_FRAMING 3 + +/* PptpCallBearerType */ +#define PPTP_ANALOG_TYPE 1 +#define PPTP_DIGITAL_TYPE 2 +#define PPTP_DONT_CARE_BEARER_TYPE 3 + +struct PptpOutCallRequest { + __u16 callID; + __u16 callSerialNumber; + __u32 minBPS; + __u32 maxBPS; + __u32 bearerType; + __u32 framingType; + __u16 packetWindow; + __u16 packetProcDelay; + __u16 reserved1; + __u16 phoneNumberLength; + __u16 reserved2; + __u8 phoneNumber[64]; + __u8 subAddress[64]; +}; + +/* PptpCallResultCode */ +#define PPTP_OUTCALL_CONNECT 1 +#define PPTP_OUTCALL_GENERAL_ERROR 2 +#define PPTP_OUTCALL_NO_CARRIER 3 +#define PPTP_OUTCALL_BUSY 4 +#define PPTP_OUTCALL_NO_DIAL_TONE 5 +#define PPTP_OUTCALL_TIMEOUT 6 +#define PPTP_OUTCALL_DONT_ACCEPT 7 + +struct PptpOutCallReply { + __u16 callID; + __u16 peersCallID; + __u8 resultCode; + __u8 generalErrorCode; + __u16 causeCode; + __u32 connectSpeed; + __u16 packetWindow; + __u16 packetProcDelay; + __u32 physChannelID; +}; + +struct PptpInCallRequest { + __u16 callID; + __u16 callSerialNumber; + __u32 callBearerType; + __u32 physChannelID; + __u16 dialedNumberLength; + __u16 dialingNumberLength; + __u8 dialedNumber[64]; + __u8 dialingNumber[64]; + __u8 subAddress[64]; +}; + +/* PptpInCallResultCode */ +#define PPTP_INCALL_ACCEPT 1 +#define PPTP_INCALL_GENERAL_ERROR 2 +#define PPTP_INCALL_DONT_ACCEPT 3 + +struct PptpInCallReply { + __u16 callID; + __u16 peersCallID; + __u8 resultCode; + __u8 generalErrorCode; + __u16 packetWindow; + __u16 packetProcDelay; + __u16 reserved; +}; + +struct PptpInCallConnected { + __u16 peersCallID; + __u16 reserved; + __u32 connectSpeed; + __u16 packetWindow; + __u16 packetProcDelay; + __u32 callFramingType; +}; + +struct PptpClearCallRequest { + __u16 callID; + __u16 reserved; +}; + +struct PptpCallDisconnectNotify { + __u16 callID; + __u8 resultCode; + __u8 generalErrorCode; + __u16 causeCode; + __u16 reserved; + __u8 callStatistics[128]; +}; + +struct PptpWanErrorNotify { + __u16 peersCallID; + __u16 reserved; + __u32 crcErrors; + __u32 framingErrors; + __u32 hardwareOverRuns; + __u32 bufferOverRuns; + __u32 timeoutErrors; + __u32 alignmentErrors; +}; + +struct PptpSetLinkInfo { + __u16 peersCallID; + __u16 reserved; + __u32 sendAccm; + __u32 recvAccm; +}; + + +struct pptp_priv_data { + __u16 call_id; + __u16 mcall_id; + __u16 pcall_id; +}; + +#endif /* __KERNEL__ */ +#endif /* _CONNTRACK_PPTP_H */ diff -uNr linux_org/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h linux/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h --- linux_org/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,123 @@ +#ifndef _CONNTRACK_PROTO_GRE_H +#define _CONNTRACK_PROTO_GRE_H +#include <asm/byteorder.h> + +/* GRE PROTOCOL HEADER */ + +/* GRE Version field */ +#define GRE_VERSION_1701 0x0 +#define GRE_VERSION_PPTP 0x1 + +/* GRE Protocol field */ +#define GRE_PROTOCOL_PPTP 0x880B + +/* GRE Flags */ +#define GRE_FLAG_C 0x80 +#define GRE_FLAG_R 0x40 +#define GRE_FLAG_K 0x20 +#define GRE_FLAG_S 0x10 +#define GRE_FLAG_A 0x80 + +#define GRE_IS_C(f) ((f)&GRE_FLAG_C) +#define GRE_IS_R(f) ((f)&GRE_FLAG_R) +#define GRE_IS_K(f) ((f)&GRE_FLAG_K) +#define GRE_IS_S(f) ((f)&GRE_FLAG_S) +#define GRE_IS_A(f) ((f)&GRE_FLAG_A) + +/* GRE is a mess: Four different standards */ +struct gre_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 rec:3, + srr:1, + seq:1, + key:1, + routing:1, + csum:1, + version:3, + reserved:4, + ack:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 csum:1, + routing:1, + key:1, + seq:1, + srr:1, + rec:3, + ack:1, + reserved:4, + version:3; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u16 protocol; +}; + +/* modified GRE header for PPTP */ +struct gre_hdr_pptp { + __u8 flags; /* bitfield */ + __u8 version; /* should be GRE_VERSION_PPTP */ + __u16 protocol; /* should be GRE_PROTOCOL_PPTP */ + __u16 payload_len; /* size of ppp payload, not inc. gre header */ + __u16 call_id; /* peer's call_id for this session */ + __u32 seq; /* sequence number. Present if S==1 */ + __u32 ack; /* seq number of highest packet recieved by */ + /* sender in this session */ +}; + + +/* this is part of ip_conntrack */ +struct ip_ct_gre { + unsigned int stream_timeout; + unsigned int timeout; +}; + +/* this is part of ip_conntrack_expect */ +struct ip_ct_gre_expect { + struct ip_ct_gre_keymap *keymap_orig, *keymap_reply; +}; + +#ifdef __KERNEL__ +struct ip_conntrack_expect; + +/* structure for original <-> reply keymap */ +struct ip_ct_gre_keymap { + struct list_head list; + + struct ip_conntrack_tuple tuple; +}; + + +/* add new tuple->key_reply pair to keymap */ +int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp, + struct ip_conntrack_tuple *t, + int reply); + +/* change an existing keymap entry */ +void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km, + struct ip_conntrack_tuple *t); + +/* delete keymap entries */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp); + + +/* get pointer to gre key, if present */ +static inline u_int32_t *gre_key(struct gre_hdr *greh) +{ + if (!greh->key) + return NULL; + if (greh->csum || greh->routing) + return (u_int32_t *) (greh+sizeof(*greh)+4); + return (u_int32_t *) (greh+sizeof(*greh)); +} + +/* get pointer ot gre csum, if present */ +static inline u_int16_t *gre_csum(struct gre_hdr *greh) +{ + if (!greh->csum) + return NULL; + return (u_int16_t *) (greh+sizeof(*greh)); +} + +#endif /* __KERNEL__ */ + +#endif /* _CONNTRACK_PROTO_GRE_H */ diff -uNr linux_org/include/linux/netfilter_ipv4/ip_conntrack_tuple.h linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h --- linux_org/include/linux/netfilter_ipv4/ip_conntrack_tuple.h 2003-11-17 02:07:46.000000000 +0100 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h 2006-10-27 14:11:52.000000000 +0200 @@ -14,7 +14,7 @@ union ip_conntrack_manip_proto { /* Add other protocols here. */ - u_int16_t all; + u_int32_t all; struct { u_int16_t port; @@ -25,6 +25,9 @@ struct { u_int16_t id; } icmp; + struct { + u_int32_t key; + } gre; }; /* The manipulable part of the tuple. */ @@ -44,7 +47,7 @@ u_int32_t ip; union { /* Add other protocols here. */ - u_int16_t all; + u_int64_t all; struct { u_int16_t port; @@ -55,6 +58,11 @@ struct { u_int8_t type, code; } icmp; + struct { + u_int16_t protocol; + u_int8_t version; + u_int32_t key; + } gre; } u; /* The protocol. */ @@ -80,10 +88,16 @@ #ifdef __KERNEL__ #define DUMP_TUPLE(tp) \ -DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n", \ +DEBUGP("tuple %p: %u %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n", \ (tp), (tp)->dst.protonum, \ - NIPQUAD((tp)->src.ip), ntohs((tp)->src.u.all), \ - NIPQUAD((tp)->dst.ip), ntohs((tp)->dst.u.all)) + NIPQUAD((tp)->src.ip), ntohl((tp)->src.u.all), \ + NIPQUAD((tp)->dst.ip), ntohl((tp)->dst.u.all)) + +#define DUMP_TUPLE_RAW(x) \ + DEBUGP("tuple %p: %u %u.%u.%u.%u:0x%08x -> %u.%u.%u.%u:0x%08x\n",\ + (x), (x)->dst.protonum, \ + NIPQUAD((x)->src.ip), ntohl((x)->src.u.all), \ + NIPQUAD((x)->dst.ip), ntohl((x)->dst.u.all)) #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) diff -uNr linux_org/include/linux/netfilter_ipv4/ip_conntrack_tuple.h.orig linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h.orig --- linux_org/include/linux/netfilter_ipv4/ip_conntrack_tuple.h.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/netfilter_ipv4/ip_conntrack_tuple.h.orig 2003-11-17 02:07:46.000000000 +0100 @@ -0,0 +1,139 @@ +#ifndef _IP_CONNTRACK_TUPLE_H +#define _IP_CONNTRACK_TUPLE_H + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +/* The protocol-specific manipulable parts of the tuple: always in + network order! */ +union ip_conntrack_manip_proto +{ + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int16_t id; + } icmp; +}; + +/* The manipulable part of the tuple. */ +struct ip_conntrack_manip +{ + u_int32_t ip; + union ip_conntrack_manip_proto u; +}; + +/* This contains the information to distinguish a connection. */ +struct ip_conntrack_tuple +{ + struct ip_conntrack_manip src; + + /* These are the parts of the tuple which are fixed. */ + struct { + u_int32_t ip; + union { + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int8_t type, code; + } icmp; + } u; + + /* The protocol. */ + u_int16_t protonum; + } dst; +}; + +/* This is optimized opposed to a memset of the whole structure. Everything we + * really care about is the source/destination unions */ +#define IP_CT_TUPLE_U_BLANK(tuple) \ + do { \ + (tuple)->src.u.all = 0; \ + (tuple)->dst.u.all = 0; \ + } while (0) + +enum ip_conntrack_dir +{ + IP_CT_DIR_ORIGINAL, + IP_CT_DIR_REPLY, + IP_CT_DIR_MAX +}; + +#ifdef __KERNEL__ + +#define DUMP_TUPLE(tp) \ +DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n", \ + (tp), (tp)->dst.protonum, \ + NIPQUAD((tp)->src.ip), ntohs((tp)->src.u.all), \ + NIPQUAD((tp)->dst.ip), ntohs((tp)->dst.u.all)) + +#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) + +/* If we're the first tuple, it's the original dir. */ +#define DIRECTION(h) ((enum ip_conntrack_dir)(&(h)->ctrack->tuplehash[1] == (h))) + +/* Connections have two entries in the hash table: one for each way */ +struct ip_conntrack_tuple_hash +{ + struct list_head list; + + struct ip_conntrack_tuple tuple; + + /* this == &ctrack->tuplehash[DIRECTION(this)]. */ + struct ip_conntrack *ctrack; +}; + +#endif /* __KERNEL__ */ + +static inline int ip_ct_tuple_src_equal(const struct ip_conntrack_tuple *t1, + const struct ip_conntrack_tuple *t2) +{ + return t1->src.ip == t2->src.ip + && t1->src.u.all == t2->src.u.all; +} + +static inline int ip_ct_tuple_dst_equal(const struct ip_conntrack_tuple *t1, + const struct ip_conntrack_tuple *t2) +{ + return t1->dst.ip == t2->dst.ip + && t1->dst.u.all == t2->dst.u.all + && t1->dst.protonum == t2->dst.protonum; +} + +static inline int ip_ct_tuple_equal(const struct ip_conntrack_tuple *t1, + const struct ip_conntrack_tuple *t2) +{ + return ip_ct_tuple_src_equal(t1, t2) && ip_ct_tuple_dst_equal(t1, t2); +} + +static inline int ip_ct_tuple_mask_cmp(const struct ip_conntrack_tuple *t, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *mask) +{ + return !(((t->src.ip ^ tuple->src.ip) & mask->src.ip) + || ((t->dst.ip ^ tuple->dst.ip) & mask->dst.ip) + || ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all) + || ((t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all) + || ((t->dst.protonum ^ tuple->dst.protonum) + & mask->dst.protonum)); +} + +#endif /* _IP_CONNTRACK_TUPLE_H */ diff -uNr linux_org/include/linux/netfilter_ipv4/ip_nat_pptp.h linux/include/linux/netfilter_ipv4/ip_nat_pptp.h --- linux_org/include/linux/netfilter_ipv4/ip_nat_pptp.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/netfilter_ipv4/ip_nat_pptp.h 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,11 @@ +/* PPTP constants and structs */ +#ifndef _NAT_PPTP_H +#define _NAT_PPTP_H + +/* conntrack private data */ +struct ip_nat_pptp { + u_int16_t pns_call_id; /* NAT'ed PNS call id */ + u_int16_t pac_call_id; /* NAT'ed PAC call id */ +}; + +#endif /* _NAT_PPTP_H */ diff -uNr linux_org/net/ipv4/netfilter/Config.in linux/net/ipv4/netfilter/Config.in --- linux_org/net/ipv4/netfilter/Config.in 2003-08-13 19:19:30.000000000 +0200 +++ linux/net/ipv4/netfilter/Config.in 2006-10-27 14:11:52.000000000 +0200 @@ -7,6 +7,11 @@ tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP_NF_CONNTRACK if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then dep_tristate ' FTP protocol support' CONFIG_IP_NF_FTP $CONFIG_IP_NF_CONNTRACK + dep_tristate ' GRE protocol support' CONFIG_IP_NF_CT_PROTO_GRE $CONFIG_IP_NF_CONNTRACK + dep_tristate ' PPTP protocol support' CONFIG_IP_NF_PPTP $CONFIG_IP_NF_CONNTRACK + if [ "$CONFIG_IP_NF_PPTP" != "n" ]; then + bool ' PPTP verbose debug' CONFIG_IP_NF_PPTP_DEBUG + fi dep_tristate ' Amanda protocol support' CONFIG_IP_NF_AMANDA $CONFIG_IP_NF_CONNTRACK dep_tristate ' TFTP protocol support' CONFIG_IP_NF_TFTP $CONFIG_IP_NF_CONNTRACK dep_tristate ' IRC protocol support' CONFIG_IP_NF_IRC $CONFIG_IP_NF_CONNTRACK @@ -67,6 +72,20 @@ fi fi bool ' NAT of local connections (READ HELP)' CONFIG_IP_NF_NAT_LOCAL + if [ "$CONFIG_IP_NF_PPTP" = "m" ]; then + define_tristate CONFIG_IP_NF_NAT_PPTP m + else + if [ "$CONFIG_IP_NF_PPTP" = "y" ]; then + define_tristate CONFIG_IP_NF_NAT_PPTP $CONFIG_IP_NF_NAT + fi + fi + if [ "$CONFIG_IP_NF_CT_PROTO_GRE" = "m" ]; then + define_tristate CONFIG_IP_NF_NAT_PROTO_GRE m + else + if [ "$CONFIG_IP_NF_CT_PROTO_GRE" = "y" ]; then + define_tristate CONFIG_IP_NF_NAT_PROTO_GRE $CONFIG_IP_NF_NAT + fi + fi if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then dep_tristate ' Basic SNMP-ALG support (EXPERIMENTAL)' CONFIG_IP_NF_NAT_SNMP_BASIC $CONFIG_IP_NF_NAT fi diff -uNr linux_org/net/ipv4/netfilter/Makefile linux/net/ipv4/netfilter/Makefile --- linux_org/net/ipv4/netfilter/Makefile 2003-08-13 19:19:30.000000000 +0200 +++ linux/net/ipv4/netfilter/Makefile 2006-10-27 14:11:52.000000000 +0200 @@ -30,8 +30,21 @@ # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o + +# connection tracking protocol helpers +obj-$(CONFIG_IP_NF_CT_PROTO_GRE) += ip_conntrack_proto_gre.o +ifdef CONFIG_IP_NF_CT_PROTO_GRE + export-objs += ip_conntrack_proto_gre.o +endif + +# NAT protocol helpers +obj-$(CONFIG_IP_NF_NAT_PROTO_GRE) += ip_nat_proto_gre.o # connection tracking helpers +obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o +ifdef CONFIG_IP_NF_NAT_PPTP + export-objs += ip_conntrack_pptp.o +endif obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o ifdef CONFIG_IP_NF_AMANDA export-objs += ip_conntrack_amanda.o @@ -49,6 +62,7 @@ endif # NAT helpers +obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o diff -uNr linux_org/net/ipv4/netfilter/ip_conntrack_core.c linux/net/ipv4/netfilter/ip_conntrack_core.c --- linux_org/net/ipv4/netfilter/ip_conntrack_core.c 2004-11-24 12:14:04.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_conntrack_core.c 2006-10-27 14:11:52.000000000 +0200 @@ -142,6 +142,8 @@ tuple->dst.ip = iph->daddr; tuple->dst.protonum = iph->protocol; + tuple->src.u.all = tuple->dst.u.all = 0; + ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl, len - 4*iph->ihl, tuple); @@ -157,6 +159,8 @@ inverse->dst.ip = orig->src.ip; inverse->dst.protonum = orig->dst.protonum; + inverse->src.u.all = inverse->dst.u.all = 0; + return protocol->invert_tuple(inverse, orig); } @@ -945,8 +949,8 @@ * so there is no need to use the tuple lock too */ DEBUGP("ip_conntrack_expect_related %p\n", related_to); - DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); - DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); + DEBUGP("tuple: "); DUMP_TUPLE_RAW(&expect->tuple); + DEBUGP("mask: "); DUMP_TUPLE_RAW(&expect->mask); old = LIST_FIND(&ip_conntrack_expect_list, resent_expect, struct ip_conntrack_expect *, &expect->tuple, @@ -1063,15 +1067,14 @@ MUST_BE_READ_LOCKED(&ip_conntrack_lock); WRITE_LOCK(&ip_conntrack_expect_tuple_lock); - DEBUGP("change_expect:\n"); - DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple); - DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask); - DEBUGP("newtuple: "); DUMP_TUPLE(newtuple); + DEBUGP("exp tuple: "); DUMP_TUPLE_RAW(&expect->tuple); + DEBUGP("exp mask: "); DUMP_TUPLE_RAW(&expect->mask); + DEBUGP("newtuple: "); DUMP_TUPLE_RAW(newtuple); if (expect->ct_tuple.dst.protonum == 0) { /* Never seen before */ DEBUGP("change expect: never seen before\n"); - if (!ip_ct_tuple_equal(&expect->tuple, newtuple) + if (!ip_ct_tuple_mask_cmp(&expect->tuple, newtuple, &expect->mask) && LIST_FIND(&ip_conntrack_expect_list, expect_clash, struct ip_conntrack_expect *, newtuple, &expect->mask)) { /* Force NAT to find an unused tuple */ diff -uNr linux_org/net/ipv4/netfilter/ip_conntrack_core.c.orig linux/net/ipv4/netfilter/ip_conntrack_core.c.orig --- linux_org/net/ipv4/netfilter/ip_conntrack_core.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_conntrack_core.c.orig 2004-11-24 12:14:04.000000000 +0100 @@ -0,0 +1,1446 @@ +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + * Public Licence. + * + * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> + * - new API and handling of conntrack/nat helpers + * - now capable of multiple expectations for one master + * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> + * - add usage/reference counts to ip_conntrack_expect + * - export ip_conntrack[_expect]_{find_get,put} functions + * */ + +#include <linux/version.h> +#include <linux/config.h> +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/vmalloc.h> +#include <linux/brlock.h> +#include <net/checksum.h> +#include <linux/stddef.h> +#include <linux/sysctl.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/jhash.h> +/* For ERR_PTR(). Yeah, I know... --RR */ +#include <linux/fs.h> + +/* This rwlock protects the main hash table, protocol/helper/expected + registrations, conntrack timers*/ +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/listhelp.h> + +#define IP_CONNTRACK_VERSION "2.1" + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_conntrack_lock); +DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock); + +void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; +LIST_HEAD(ip_conntrack_expect_list); +LIST_HEAD(protocol_list); +static LIST_HEAD(helpers); +unsigned int ip_conntrack_htable_size = 0; +int ip_conntrack_max = 0; +static atomic_t ip_conntrack_count = ATOMIC_INIT(0); +struct list_head *ip_conntrack_hash; +static kmem_cache_t *ip_conntrack_cachep; + +extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; + +static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, + u_int8_t protocol) +{ + return protocol == curr->proto; +} + +struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + p = LIST_FIND(&protocol_list, proto_cmpfn, + struct ip_conntrack_protocol *, protocol); + if (!p) + p = &ip_conntrack_generic_protocol; + + return p; +} + +struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + READ_LOCK(&ip_conntrack_lock); + p = __ip_ct_find_proto(protocol); + READ_UNLOCK(&ip_conntrack_lock); + return p; +} + +inline void +ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + IP_NF_ASSERT(ct->infos[0].master); + /* nf_conntrack_put wants to go via an info struct, so feed it + one at random. */ + nf_conntrack_put(&ct->infos[0]); +} + +static int ip_conntrack_hash_rnd_initted; +static unsigned int ip_conntrack_hash_rnd; + +static u_int32_t +hash_conntrack(const struct ip_conntrack_tuple *tuple) +{ +#if 0 + dump_tuple(tuple); +#endif + return (jhash_3words(tuple->src.ip, + (tuple->dst.ip ^ tuple->dst.protonum), + (tuple->src.u.all | (tuple->dst.u.all << 16)), + ip_conntrack_hash_rnd) % ip_conntrack_htable_size); +} + +inline int +get_tuple(const struct iphdr *iph, size_t len, + struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol) +{ + int ret; + + /* Never happen */ + if (iph->frag_off & htons(IP_OFFSET)) { + printk("ip_conntrack_core: Frag of proto %u.\n", + iph->protocol); + return 0; + } + /* Guarantee 8 protocol bytes: if more wanted, use len param */ + else if (iph->ihl * 4 + 8 > len) + return 0; + + tuple->src.ip = iph->saddr; + tuple->dst.ip = iph->daddr; + tuple->dst.protonum = iph->protocol; + + ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl, + len - 4*iph->ihl, + tuple); + return ret; +} + +static int +invert_tuple(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig, + const struct ip_conntrack_protocol *protocol) +{ + inverse->src.ip = orig->dst.ip; + inverse->dst.ip = orig->src.ip; + inverse->dst.protonum = orig->dst.protonum; + + return protocol->invert_tuple(inverse, orig); +} + + +/* ip_conntrack_expect helper functions */ + +/* Compare tuple parts depending on mask. */ +static inline int expect_cmp(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); + return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask); +} + +static void +destroy_expect(struct ip_conntrack_expect *exp) +{ + DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use)); + IP_NF_ASSERT(atomic_read(&exp->use) == 0); + IP_NF_ASSERT(!timer_pending(&exp->timeout)); + + kfree(exp); +} + +inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) +{ + IP_NF_ASSERT(exp); + + if (atomic_dec_and_test(&exp->use)) { + /* usage count dropped to zero */ + destroy_expect(exp); + } +} + +static inline struct ip_conntrack_expect * +__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); + return LIST_FIND(&ip_conntrack_expect_list, expect_cmp, + struct ip_conntrack_expect *, tuple); +} + +/* Find a expectation corresponding to a tuple. */ +struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *exp; + + READ_LOCK(&ip_conntrack_lock); + READ_LOCK(&ip_conntrack_expect_tuple_lock); + exp = __ip_ct_expect_find(tuple); + if (exp) + atomic_inc(&exp->use); + READ_UNLOCK(&ip_conntrack_expect_tuple_lock); + READ_UNLOCK(&ip_conntrack_lock); + + return exp; +} + +/* remove one specific expectation from all lists and drop refcount, + * does _NOT_ delete the timer. */ +static void __unexpect_related(struct ip_conntrack_expect *expect) +{ + DEBUGP("unexpect_related(%p)\n", expect); + MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); + + /* we're not allowed to unexpect a confirmed expectation! */ + IP_NF_ASSERT(!expect->sibling); + + /* delete from global and local lists */ + list_del(&expect->list); + list_del(&expect->expected_list); + + /* decrement expect-count of master conntrack */ + if (expect->expectant) + expect->expectant->expecting--; + + ip_conntrack_expect_put(expect); +} + +/* remove one specific expecatation from all lists, drop refcount + * and expire timer. + * This function can _NOT_ be called for confirmed expects! */ +static void unexpect_related(struct ip_conntrack_expect *expect) +{ + IP_NF_ASSERT(expect->expectant); + IP_NF_ASSERT(expect->expectant->helper); + /* if we are supposed to have a timer, but we can't delete + * it: race condition. __unexpect_related will + * be calledd by timeout function */ + if (expect->expectant->helper->timeout + && !del_timer(&expect->timeout)) + return; + + __unexpect_related(expect); +} + +/* delete all unconfirmed expectations for this conntrack */ +static void remove_expectations(struct ip_conntrack *ct, int drop_refcount) +{ + struct list_head *exp_entry, *next; + struct ip_conntrack_expect *exp; + + DEBUGP("remove_expectations(%p)\n", ct); + + list_for_each_safe(exp_entry, next, &ct->sibling_list) { + exp = list_entry(exp_entry, struct ip_conntrack_expect, + expected_list); + + /* we skip established expectations, as we want to delete + * the un-established ones only */ + if (exp->sibling) { + DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct); + if (drop_refcount) { + /* Indicate that this expectations parent is dead */ + ip_conntrack_put(exp->expectant); + exp->expectant = NULL; + } + continue; + } + + IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp)); + IP_NF_ASSERT(exp->expectant == ct); + + /* delete expectation from global and private lists */ + unexpect_related(exp); + } +} + +static void +clean_from_lists(struct ip_conntrack *ct) +{ + unsigned int ho, hr; + + DEBUGP("clean_from_lists(%p)\n", ct); + MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); + + ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + + /* Destroy all un-established, pending expectations */ + remove_expectations(ct, 1); +} + +static void +destroy_conntrack(struct nf_conntrack *nfct) +{ + struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL; + struct ip_conntrack_protocol *proto; + + DEBUGP("destroy_conntrack(%p)\n", ct); + IP_NF_ASSERT(atomic_read(&nfct->use) == 0); + IP_NF_ASSERT(!timer_pending(&ct->timeout)); + + /* To make sure we don't get any weird locking issues here: + * destroy_conntrack() MUST NOT be called with a write lock + * to ip_conntrack_lock!!! -HW */ + proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + if (proto && proto->destroy) + proto->destroy(ct); + + if (ip_conntrack_destroyed) + ip_conntrack_destroyed(ct); + + WRITE_LOCK(&ip_conntrack_lock); + /* Make sure don't leave any orphaned expectations lying around */ + if (ct->expecting) + remove_expectations(ct, 1); + + /* Delete our master expectation */ + if (ct->master) { + if (ct->master->expectant) { + /* can't call __unexpect_related here, + * since it would screw up expect_list */ + list_del(&ct->master->expected_list); + master = ct->master->expectant; + } + kfree(ct->master); + } + WRITE_UNLOCK(&ip_conntrack_lock); + + if (master) + ip_conntrack_put(master); + + DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); + kmem_cache_free(ip_conntrack_cachep, ct); + atomic_dec(&ip_conntrack_count); +} + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct ip_conntrack *ct = (void *)ul_conntrack; + + WRITE_LOCK(&ip_conntrack_lock); + clean_from_lists(ct); + WRITE_UNLOCK(&ip_conntrack_lock); + ip_conntrack_put(ct); +} + +static inline int +conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + return i->ctrack != ignored_conntrack + && ip_ct_tuple_equal(tuple, &i->tuple); +} + +static struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + unsigned int hash = hash_conntrack(tuple); + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + h = LIST_FIND(&ip_conntrack_hash[hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + tuple, ignored_conntrack); + return h; +} + +/* Find a connection corresponding to a tuple. */ +struct ip_conntrack_tuple_hash * +ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +static inline struct ip_conntrack * +__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo) +{ + struct ip_conntrack *ct + = (struct ip_conntrack *)nfct->master; + + /* ctinfo is the index of the nfct inside the conntrack */ + *ctinfo = nfct - ct->infos; + IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER); + return ct; +} + +/* Return conntrack and conntrack_info given skb->nfct->master */ +struct ip_conntrack * +ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) +{ + if (skb->nfct) + return __ip_conntrack_get(skb->nfct, ctinfo); + return NULL; +} + +/* Confirm a connection given skb->nfct; places it in hash table */ +int +__ip_conntrack_confirm(struct nf_ct_info *nfct) +{ + unsigned int hash, repl_hash; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = __ip_conntrack_get(nfct, &ctinfo); + + /* ipt_REJECT uses ip_conntrack_attach to attach related + ICMP/TCP RST packets in other direction. Actual packet + which created connection will be IP_CT_NEW or for an + expected connection, IP_CT_RELATED. */ + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + /* We're not in hash table, and we refuse to set up related + connections for unconfirmed conns. But packet copies and + REJECT will give spurious warnings here. */ + /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ + + /* No external references means noone else could have + confirmed us. */ + IP_NF_ASSERT(!is_confirmed(ct)); + DEBUGP("Confirming conntrack %p\n", ct); + + WRITE_LOCK(&ip_conntrack_lock); + /* See if there's one in the list already, including reverse: + NAT could have grabbed it without realizing, since we're + not in the hash. If there is, we lost race. */ + if (!LIST_FIND(&ip_conntrack_hash[hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) + && !LIST_FIND(&ip_conntrack_hash[repl_hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { + list_prepend(&ip_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + list_prepend(&ip_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY]); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + WRITE_UNLOCK(&ip_conntrack_lock); + return NF_ACCEPT; + } + + WRITE_UNLOCK(&ip_conntrack_lock); + return NF_DROP; +} + +/* Returns true if a connection correspondings to the tuple (required + for NAT). */ +int +ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + READ_UNLOCK(&ip_conntrack_lock); + + return h != NULL; +} + +/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ +struct ip_conntrack * +icmp_error_track(struct sk_buff *skb, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + const struct iphdr *iph = skb->nh.iph; + struct icmphdr *hdr; + struct ip_conntrack_tuple innertuple, origtuple; + struct iphdr *inner; + size_t datalen; + struct ip_conntrack_protocol *innerproto; + struct ip_conntrack_tuple_hash *h; + + IP_NF_ASSERT(iph->protocol == IPPROTO_ICMP); + IP_NF_ASSERT(skb->nfct == NULL); + + hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + inner = (struct iphdr *)(hdr + 1); + datalen = skb->len - iph->ihl*4 - sizeof(*hdr); + + if (skb->len < iph->ihl * 4 + sizeof(*hdr) + sizeof(*iph)) { + DEBUGP("icmp_error_track: too short\n"); + return NULL; + } + + if (hdr->type != ICMP_DEST_UNREACH + && hdr->type != ICMP_SOURCE_QUENCH + && hdr->type != ICMP_TIME_EXCEEDED + && hdr->type != ICMP_PARAMETERPROB + && hdr->type != ICMP_REDIRECT) + return NULL; + + /* Ignore ICMP's containing fragments (shouldn't happen) */ + if (inner->frag_off & htons(IP_OFFSET)) { + DEBUGP("icmp_error_track: fragment of proto %u\n", + inner->protocol); + return NULL; + } + + /* Ignore it if the checksum's bogus. */ + if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) { + DEBUGP("icmp_error_track: bad csum\n"); + return NULL; + } + + innerproto = ip_ct_find_proto(inner->protocol); + /* Are they talking about one of our connections? */ + if (inner->ihl * 4 + 8 > datalen + || !get_tuple(inner, datalen, &origtuple, innerproto)) { + DEBUGP("icmp_error: ! get_tuple p=%u (%u*4+%u dlen=%u)\n", + inner->protocol, inner->ihl, 8, + datalen); + return NULL; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!invert_tuple(&innertuple, &origtuple, innerproto)) { + DEBUGP("icmp_error_track: Can't invert tuple\n"); + return NULL; + } + + *ctinfo = IP_CT_RELATED; + + h = ip_conntrack_find_get(&innertuple, NULL); + if (!h) { + /* Locally generated ICMPs will match inverted if they + haven't been SNAT'ed yet */ + /* FIXME: NAT code has to handle half-done double NAT --RR */ + if (hooknum == NF_IP_LOCAL_OUT) + h = ip_conntrack_find_get(&origtuple, NULL); + + if (!h) { + DEBUGP("icmp_error_track: no match\n"); + return NULL; + } + /* Reverse direction from that found */ + if (DIRECTION(h) != IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } else { + if (DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } + + /* Update skb to refer to this connection */ + skb->nfct = &h->ctrack->infos[*ctinfo]; + return h->ctrack; +} + +/* There's a small race here where we may free a just-assured + connection. Too bad: we're in trouble anyway. */ +static inline int unreplied(const struct ip_conntrack_tuple_hash *i) +{ + return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status)); +} + +static int early_drop(struct list_head *chain) +{ + /* Traverse backwards: gives us oldest, which is roughly LRU */ + struct ip_conntrack_tuple_hash *h; + int dropped = 0; + + READ_LOCK(&ip_conntrack_lock); + h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + if (!h) + return dropped; + + if (del_timer(&h->ctrack->timeout)) { + death_by_timeout((unsigned long)h->ctrack); + dropped = 1; + } + ip_conntrack_put(h->ctrack); + return dropped; +} + +static inline int helper_cmp(const struct ip_conntrack_helper *i, + const struct ip_conntrack_tuple *rtuple) +{ + return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); +} + +struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) +{ + return LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + tuple); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + failed due to stress. Otherwise it really is unclassifiable. */ +static struct ip_conntrack_tuple_hash * +init_conntrack(const struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol, + struct sk_buff *skb) +{ + struct ip_conntrack *conntrack; + struct ip_conntrack_tuple repl_tuple; + size_t hash; + struct ip_conntrack_expect *expected; + int i; + static unsigned int drop_next = 0; + + if (!ip_conntrack_hash_rnd_initted) { + get_random_bytes(&ip_conntrack_hash_rnd, 4); + ip_conntrack_hash_rnd_initted = 1; + } + + hash = hash_conntrack(tuple); + + if (ip_conntrack_max && + atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + /* Try dropping from random chain, or else from the + chain about to put into (in case they're trying to + bomb one hash chain). */ + unsigned int next = (drop_next++)%ip_conntrack_htable_size; + + if (!early_drop(&ip_conntrack_hash[next]) + && !early_drop(&ip_conntrack_hash[hash])) { + if (net_ratelimit()) + printk(KERN_WARNING + "ip_conntrack: table full, dropping" + " packet.\n"); + return ERR_PTR(-ENOMEM); + } + } + + if (!invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); + if (!conntrack) { + DEBUGP("Can't allocate conntrack.\n"); + return ERR_PTR(-ENOMEM); + } + + memset(conntrack, 0, sizeof(*conntrack)); + atomic_set(&conntrack->ct_general.use, 1); + conntrack->ct_general.destroy = destroy_conntrack; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; + conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; + for (i=0; i < IP_CT_NUMBER; i++) + conntrack->infos[i].master = &conntrack->ct_general; + + if (!protocol->new(conntrack, skb->nh.iph, skb->len)) { + kmem_cache_free(ip_conntrack_cachep, conntrack); + return NULL; + } + /* Don't set timer yet: wait for confirmation */ + init_timer(&conntrack->timeout); + conntrack->timeout.data = (unsigned long)conntrack; + conntrack->timeout.function = death_by_timeout; + + INIT_LIST_HEAD(&conntrack->sibling_list); + + WRITE_LOCK(&ip_conntrack_lock); + /* Need finding and deleting of expected ONLY if we win race */ + READ_LOCK(&ip_conntrack_expect_tuple_lock); + expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp, + struct ip_conntrack_expect *, tuple); + READ_UNLOCK(&ip_conntrack_expect_tuple_lock); + + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (expected && !is_confirmed(expected->expectant)) + expected = NULL; + + /* Look up the conntrack helper for master connections only */ + if (!expected) + conntrack->helper = ip_ct_find_helper(&repl_tuple); + + /* If the expectation is dying, then this is a looser. */ + if (expected + && expected->expectant->helper->timeout + && ! del_timer(&expected->timeout)) + expected = NULL; + + if (expected) { + DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", + conntrack, expected); + /* Welcome, Mr. Bond. We've been expecting you... */ + __set_bit(IPS_EXPECTED_BIT, &conntrack->status); + conntrack->master = expected; + expected->sibling = conntrack; + LIST_DELETE(&ip_conntrack_expect_list, expected); + expected->expectant->expecting--; + nf_conntrack_get(&master_ct(conntrack)->infos[0]); + } + atomic_inc(&ip_conntrack_count); + WRITE_UNLOCK(&ip_conntrack_lock); + + if (expected && expected->expectfn) + expected->expectfn(conntrack); + return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; +} + +/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +static inline struct ip_conntrack * +resolve_normal_ct(struct sk_buff *skb, + struct ip_conntrack_protocol *proto, + int *set_reply, + unsigned int hooknum, + enum ip_conntrack_info *ctinfo) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_tuple_hash *h; + + IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); + + if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto)) + return NULL; + + /* look for tuple match */ + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + h = init_conntrack(&tuple, proto, skb); + if (!h) + return NULL; + if (IS_ERR(h)) + return (void *)h; + } + + /* It exists; we have (non-exclusive) reference. */ + if (DIRECTION(h) == IP_CT_DIR_REPLY) { + *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + /* Please set reply bit if this packet OK */ + *set_reply = 1; + } else { + /* Once we've had two way comms, always ESTABLISHED. */ + if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) { + DEBUGP("ip_conntrack_in: normal packet for %p\n", + h->ctrack); + *ctinfo = IP_CT_ESTABLISHED; + } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) { + DEBUGP("ip_conntrack_in: related packet for %p\n", + h->ctrack); + *ctinfo = IP_CT_RELATED; + } else { + DEBUGP("ip_conntrack_in: new packet for %p\n", + h->ctrack); + *ctinfo = IP_CT_NEW; + } + *set_reply = 0; + } + skb->nfct = &h->ctrack->infos[*ctinfo]; + return h->ctrack; +} + +/* Netfilter hook itself. */ +unsigned int ip_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_conntrack_protocol *proto; + int set_reply; + int ret; + + /* FIXME: Do this right please. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN; + +/* Doesn't cover locally-generated broadcast, so not worth it. */ +#if 0 + /* Ignore broadcast: no `connection'. */ + if ((*pskb)->pkt_type == PACKET_BROADCAST) { + printk("Broadcast packet!\n"); + return NF_ACCEPT; + } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) + == htonl(0x000000FF)) { + printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n", + NIPQUAD((*pskb)->nh.iph->saddr), + NIPQUAD((*pskb)->nh.iph->daddr), + (*pskb)->sk, (*pskb)->pkt_type); + } +#endif + + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + /* Gather fragments. */ + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + if (!*pskb) + return NF_STOLEN; + } + + proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); + + /* It may be an icmp error... */ + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP + && icmp_error_track(*pskb, &ctinfo, hooknum)) + return NF_ACCEPT; + + if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) + /* Not valid part of a connection */ + return NF_ACCEPT; + + if (IS_ERR(ct)) + /* Too stressed to deal. */ + return NF_DROP; + + IP_NF_ASSERT((*pskb)->nfct); + + ret = proto->packet(ct, (*pskb)->nh.iph, (*pskb)->len, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + + if (ret != NF_DROP && ct->helper) { + ret = ct->helper->help((*pskb)->nh.iph, (*pskb)->len, + ct, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + } + if (set_reply) + set_bit(IPS_SEEN_REPLY_BIT, &ct->status); + + return ret; +} + +int invert_tuplepr(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig) +{ + return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum)); +} + +static inline int resent_expect(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *mask) +{ + DEBUGP("resent_expect\n"); + DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple); + DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple); + DEBUGP("test tuple: "); DUMP_TUPLE(tuple); + return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple)) + || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple))) + && ip_ct_tuple_equal(&i->mask, mask)); +} + +/* Would two expected things clash? */ +static inline int expect_clash(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *mask) +{ + /* Part covered by intersection of masks must be unequal, + otherwise they clash */ + struct ip_conntrack_tuple intersect_mask + = { { i->mask.src.ip & mask->src.ip, + { i->mask.src.u.all & mask->src.u.all } }, + { i->mask.dst.ip & mask->dst.ip, + { i->mask.dst.u.all & mask->dst.u.all }, + i->mask.dst.protonum & mask->dst.protonum } }; + + return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask); +} + +inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect) +{ + WRITE_LOCK(&ip_conntrack_lock); + unexpect_related(expect); + WRITE_UNLOCK(&ip_conntrack_lock); +} + +static void expectation_timed_out(unsigned long ul_expect) +{ + struct ip_conntrack_expect *expect = (void *) ul_expect; + + DEBUGP("expectation %p timed out\n", expect); + WRITE_LOCK(&ip_conntrack_lock); + __unexpect_related(expect); + WRITE_UNLOCK(&ip_conntrack_lock); +} + +/* Add a related connection. */ +int ip_conntrack_expect_related(struct ip_conntrack *related_to, + struct ip_conntrack_expect *expect) +{ + struct ip_conntrack_expect *old, *new; + int ret = 0; + + WRITE_LOCK(&ip_conntrack_lock); + /* Because of the write lock, no reader can walk the lists, + * so there is no need to use the tuple lock too */ + + DEBUGP("ip_conntrack_expect_related %p\n", related_to); + DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); + DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); + + old = LIST_FIND(&ip_conntrack_expect_list, resent_expect, + struct ip_conntrack_expect *, &expect->tuple, + &expect->mask); + if (old) { + /* Helper private data may contain offsets but no pointers + pointing into the payload - otherwise we should have to copy + the data filled out by the helper over the old one */ + DEBUGP("expect_related: resent packet\n"); + if (related_to->helper->timeout) { + if (!del_timer(&old->timeout)) { + /* expectation is dying. Fall through */ + old = NULL; + } else { + old->timeout.expires = jiffies + + related_to->helper->timeout * HZ; + add_timer(&old->timeout); + } + } + + if (old) { + WRITE_UNLOCK(&ip_conntrack_lock); + return -EEXIST; + } + } else if (related_to->helper->max_expected && + related_to->expecting >= related_to->helper->max_expected) { + /* old == NULL */ + if (!(related_to->helper->flags & + IP_CT_HELPER_F_REUSE_EXPECT)) { + WRITE_UNLOCK(&ip_conntrack_lock); + if (net_ratelimit()) + printk(KERN_WARNING + "ip_conntrack: max number of expected " + "connections %i of %s reached for " + "%u.%u.%u.%u->%u.%u.%u.%u\n", + related_to->helper->max_expected, + related_to->helper->name, + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); + return -EPERM; + } + DEBUGP("ip_conntrack: max number of expected " + "connections %i of %s reached for " + "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n", + related_to->helper->max_expected, + related_to->helper->name, + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); + + /* choose the the oldest expectation to evict */ + list_for_each_entry(old, &related_to->sibling_list, + expected_list) + if (old->sibling == NULL) + break; + + /* We cannot fail since related_to->expecting is the number + * of unconfirmed expectations */ + IP_NF_ASSERT(old && old->sibling == NULL); + + /* newnat14 does not reuse the real allocated memory + * structures but rather unexpects the old and + * allocates a new. unexpect_related will decrement + * related_to->expecting. + */ + unexpect_related(old); + ret = -EPERM; + } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash, + struct ip_conntrack_expect *, &expect->tuple, + &expect->mask)) { + WRITE_UNLOCK(&ip_conntrack_lock); + DEBUGP("expect_related: busy!\n"); + return -EBUSY; + } + + new = (struct ip_conntrack_expect *) + kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); + if (!new) { + WRITE_UNLOCK(&ip_conntrack_lock); + DEBUGP("expect_relaed: OOM allocating expect\n"); + return -ENOMEM; + } + + DEBUGP("new expectation %p of conntrack %p\n", new, related_to); + memcpy(new, expect, sizeof(*expect)); + new->expectant = related_to; + new->sibling = NULL; + atomic_set(&new->use, 1); + + /* add to expected list for this connection */ + list_add_tail(&new->expected_list, &related_to->sibling_list); + /* add to global list of expectations */ + list_prepend(&ip_conntrack_expect_list, &new->list); + /* add and start timer if required */ + if (related_to->helper->timeout) { + init_timer(&new->timeout); + new->timeout.data = (unsigned long)new; + new->timeout.function = expectation_timed_out; + new->timeout.expires = jiffies + + related_to->helper->timeout * HZ; + add_timer(&new->timeout); + } + related_to->expecting++; + + WRITE_UNLOCK(&ip_conntrack_lock); + + return ret; +} + +/* Change tuple in an existing expectation */ +int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, + struct ip_conntrack_tuple *newtuple) +{ + int ret; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + WRITE_LOCK(&ip_conntrack_expect_tuple_lock); + + DEBUGP("change_expect:\n"); + DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple); + DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask); + DEBUGP("newtuple: "); DUMP_TUPLE(newtuple); + if (expect->ct_tuple.dst.protonum == 0) { + /* Never seen before */ + DEBUGP("change expect: never seen before\n"); + if (!ip_ct_tuple_equal(&expect->tuple, newtuple) + && LIST_FIND(&ip_conntrack_expect_list, expect_clash, + struct ip_conntrack_expect *, newtuple, &expect->mask)) { + /* Force NAT to find an unused tuple */ + ret = -1; + } else { + memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple)); + memcpy(&expect->tuple, newtuple, sizeof(expect->tuple)); + ret = 0; + } + } else { + /* Resent packet */ + DEBUGP("change expect: resent packet\n"); + if (ip_ct_tuple_equal(&expect->tuple, newtuple)) { + ret = 0; + } else { + /* Force NAT to choose again the same port */ + ret = -1; + } + } + WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock); + + return ret; +} + +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +int ip_conntrack_alter_reply(struct ip_conntrack *conntrack, + const struct ip_conntrack_tuple *newreply) +{ + WRITE_LOCK(&ip_conntrack_lock); + if (__ip_conntrack_find(newreply, conntrack)) { + WRITE_UNLOCK(&ip_conntrack_lock); + return 0; + } + /* Should be unconfirmed, so not in hash table yet */ + IP_NF_ASSERT(!is_confirmed(conntrack)); + + DEBUGP("Altering reply tuple of %p to ", conntrack); + DUMP_TUPLE(newreply); + + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; + if (!conntrack->master && list_empty(&conntrack->sibling_list)) + conntrack->helper = ip_ct_find_helper(newreply); + WRITE_UNLOCK(&ip_conntrack_lock); + + return 1; +} + +int ip_conntrack_helper_register(struct ip_conntrack_helper *me) +{ + MOD_INC_USE_COUNT; + + WRITE_LOCK(&ip_conntrack_lock); + list_prepend(&helpers, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + return 0; +} + +static inline int unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) +{ + if (i->ctrack->helper == me) { + /* Get rid of any expected. */ + remove_expectations(i->ctrack, 0); + /* And *then* set helper to NULL */ + i->ctrack->helper = NULL; + } + return 0; +} + +void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) +{ + unsigned int i; + + /* Need write lock here, to delete helper. */ + WRITE_LOCK(&ip_conntrack_lock); + LIST_DELETE(&helpers, me); + + /* Get rid of expecteds, set helpers to NULL. */ + for (i = 0; i < ip_conntrack_htable_size; i++) + LIST_FIND_W(&ip_conntrack_hash[i], unhelp, + struct ip_conntrack_tuple_hash *, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + MOD_DEC_USE_COUNT; +} + +/* Refresh conntrack for this many jiffies. */ +void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies) +{ + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + + WRITE_LOCK(&ip_conntrack_lock); + /* If not in hash table, timer will not be active yet */ + if (!is_confirmed(ct)) + ct->timeout.expires = extra_jiffies; + else { + /* Need del_timer for race avoidance (may already be dying). */ + if (del_timer(&ct->timeout)) { + ct->timeout.expires = jiffies + extra_jiffies; + add_timer(&ct->timeout); + } + } + WRITE_UNLOCK(&ip_conntrack_lock); +} + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +ip_ct_gather_frags(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int olddebug = skb->nf_debug; +#endif + if (sk) { + sock_hold(sk); + skb_orphan(skb); + } + + local_bh_disable(); + skb = ip_defrag(skb); + local_bh_enable(); + + if (!skb) { + if (sk) sock_put(sk); + return skb; + } else if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { + kfree_skb(skb); + if (sk) sock_put(sk); + return NULL; + } + + if (sk) { + skb_set_owner_w(skb, sk); + sock_put(sk); + } + + ip_send_check(skb->nh.iph); + skb->nfcache |= NFC_ALTERED; +#ifdef CONFIG_NETFILTER_DEBUG + /* Packet path as if nothing had happened. */ + skb->nf_debug = olddebug; +#endif + return skb; +} + +/* Used by ipt_REJECT. */ +static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = __ip_conntrack_get(nfct, &ctinfo); + + /* This ICMP is in reverse direction to the packet which + caused it */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) + ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + else + ctinfo = IP_CT_RELATED; + + /* Attach new skbuff, and increment count */ + nskb->nfct = &ct->infos[ctinfo]; + atomic_inc(&ct->ct_general.use); +} + +static inline int +do_kill(const struct ip_conntrack_tuple_hash *i, + int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + return kill(i->ctrack, data); +} + +/* Bring out ya dead! */ +static struct ip_conntrack_tuple_hash * +get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data), + void *data, unsigned int *bucket) +{ + struct ip_conntrack_tuple_hash *h = NULL; + + READ_LOCK(&ip_conntrack_lock); + for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) { + h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill, + struct ip_conntrack_tuple_hash *, kill, data); + } + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +void +ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + struct ip_conntrack_tuple_hash *h; + unsigned int bucket = 0; + + while ((h = get_next_corpse(kill, data, &bucket)) != NULL) { + /* Time to push up daises... */ + if (del_timer(&h->ctrack->timeout)) + death_by_timeout((unsigned long)h->ctrack); + /* ... else the timer will get him soon. */ + + ip_conntrack_put(h->ctrack); + } +} + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void *user, int *len) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + + IP_CT_TUPLE_U_BLANK(&tuple); + tuple.src.ip = sk->rcv_saddr; + tuple.src.u.tcp.port = sk->sport; + tuple.dst.ip = sk->daddr; + tuple.dst.u.tcp.port = sk->dport; + tuple.dst.protonum = IPPROTO_TCP; + + /* We only do TCP at the moment: is there a better way? */ + if (strcmp(sk->prot->name, "TCP") != 0) { + DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + return -ENOPROTOOPT; + } + + if ((unsigned int) *len < sizeof(struct sockaddr_in)) { + DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", + *len, sizeof(struct sockaddr_in)); + return -EINVAL; + } + + h = ip_conntrack_find_get(&tuple, NULL); + if (h) { + struct sockaddr_in sin; + + sin.sin_family = AF_INET; + sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.ip; + + DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", + NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + ip_conntrack_put(h->ctrack); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", + NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), + NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +static struct nf_sockopt_ops so_getorigdst += { { NULL, NULL }, PF_INET, + 0, 0, NULL, /* Setsockopts */ + SO_ORIGINAL_DST, SO_ORIGINAL_DST+1, &getorigdst, + 0, NULL }; + +static int kill_all(const struct ip_conntrack *i, void *data) +{ + return 1; +} + +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip_conntrack_cleanup(void) +{ + ip_ct_attach = NULL; + /* This makes sure all current packets have passed through + netfilter framework. Roll on, two-stage module + delete... */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + i_see_dead_people: + ip_ct_selective_cleanup(kill_all, NULL); + if (atomic_read(&ip_conntrack_count) != 0) { + schedule(); + goto i_see_dead_people; + } + + kmem_cache_destroy(ip_conntrack_cachep); + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); +} + +static int hashsize = 0; +MODULE_PARM(hashsize, "i"); + +int __init ip_conntrack_init(void) +{ + unsigned int i; + int ret; + + /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB + * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ + if (hashsize) { + ip_conntrack_htable_size = hashsize; + } else { + ip_conntrack_htable_size + = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct list_head)); + if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + ip_conntrack_htable_size = 8192; + if (ip_conntrack_htable_size < 16) + ip_conntrack_htable_size = 16; + } + ip_conntrack_max = 8 * ip_conntrack_htable_size; + + printk("ip_conntrack version %s (%u buckets, %d max)" + " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, + ip_conntrack_htable_size, ip_conntrack_max, + sizeof(struct ip_conntrack)); + + ret = nf_register_sockopt(&so_getorigdst); + if (ret != 0) { + printk(KERN_ERR "Unable to register netfilter socket option\n"); + return ret; + } + + ip_conntrack_hash = vmalloc(sizeof(struct list_head) + * ip_conntrack_htable_size); + if (!ip_conntrack_hash) { + printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); + goto err_unreg_sockopt; + } + + ip_conntrack_cachep = kmem_cache_create("ip_conntrack", + sizeof(struct ip_conntrack), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ip_conntrack_cachep) { + printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); + goto err_free_hash; + } + /* Don't NEED lock here, but good form anyway. */ + WRITE_LOCK(&ip_conntrack_lock); + /* Sew in builtin protocols. */ + list_append(&protocol_list, &ip_conntrack_protocol_tcp); + list_append(&protocol_list, &ip_conntrack_protocol_udp); + list_append(&protocol_list, &ip_conntrack_protocol_icmp); + WRITE_UNLOCK(&ip_conntrack_lock); + + for (i = 0; i < ip_conntrack_htable_size; i++) + INIT_LIST_HEAD(&ip_conntrack_hash[i]); + + /* For use by ipt_REJECT */ + ip_ct_attach = ip_conntrack_attach; + return ret; + +err_free_hash: + vfree(ip_conntrack_hash); +err_unreg_sockopt: + nf_unregister_sockopt(&so_getorigdst); + + return -ENOMEM; +} diff -uNr linux_org/net/ipv4/netfilter/ip_conntrack_pptp.c linux/net/ipv4/netfilter/ip_conntrack_pptp.c --- linux_org/net/ipv4/netfilter/ip_conntrack_pptp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_conntrack_pptp.c 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,637 @@ +/* + * ip_conntrack_pptp.c - Version 1.9 + * + * Connection tracking support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * Limitations: + * - We blindly assume that control connections are always + * established in PNS->PAC direction. This is a violation + * of RFFC2673 + * + * TODO: - finish support for multiple calls within one session + * (needs expect reservations in newnat) + * - testing of incoming PPTP calls + * + * Changes: + * 2002-02-05 - Version 1.3 + * - Call ip_conntrack_unexpect_related() from + * pptp_timeout_related() to destroy expectations in case + * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen + * (Philip Craig <philipc@snapgear.com>) + * - Add Version information at module loadtime + * 2002-02-10 - Version 1.6 + * - move to C99 style initializers + * - remove second expectation if first arrives + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/lockhelp.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_CT_PPTP_VERSION "1.9" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); + +DECLARE_LOCK(ip_pptp_lock); + +#if 0 +#include "ip_conntrack_pptp_priv.h" +#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ + ": " format, ## args) +#else +#define DEBUGP(format, args...) +#endif + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + +#define PPTP_GRE_TIMEOUT (10 MINS) +#define PPTP_GRE_STREAM_TIMEOUT (5 DAYS) + +static int pptp_expectfn(struct ip_conntrack *ct) +{ + struct ip_conntrack *master; + struct ip_conntrack_expect *exp; + + DEBUGP("increasing timeouts\n"); + /* increase timeout of GRE data channel conntrack entry */ + ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; + ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; + + master = master_ct(ct); + if (!master) { + DEBUGP(" no master!!!\n"); + return 0; + } + + exp = ct->master; + if (!exp) { + DEBUGP("no expectation!!\n"); + return 0; + } + + DEBUGP("completing tuples with ct info\n"); + /* we can do this, since we're unconfirmed */ + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == + htonl(master->help.ct_pptp_info.pac_call_id)) { + /* assume PNS->PAC */ + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key = + htonl(master->help.ct_pptp_info.pns_call_id); + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key = + htonl(master->help.ct_pptp_info.pns_call_id); + } else { + /* assume PAC->PNS */ + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key = + htonl(master->help.ct_pptp_info.pac_call_id); + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key = + htonl(master->help.ct_pptp_info.pac_call_id); + } + + /* delete other expectation */ + if (exp->expected_list.next != &exp->expected_list) { + struct ip_conntrack_expect *other_exp; + struct list_head *cur_item, *next; + + for (cur_item = master->sibling_list.next; + cur_item != &master->sibling_list; cur_item = next) { + next = cur_item->next; + other_exp = list_entry(cur_item, + struct ip_conntrack_expect, + expected_list); + /* remove only if occurred at same sequence number */ + if (other_exp != exp && other_exp->seq == exp->seq) { + DEBUGP("unexpecting other direction\n"); + ip_ct_gre_keymap_destroy(other_exp); + ip_conntrack_unexpect_related(other_exp); + } + } + } + + return 0; +} + +/* timeout GRE data connections */ +static int pptp_timeout_related(struct ip_conntrack *ct) +{ + struct list_head *cur_item, *next; + struct ip_conntrack_expect *exp; + + /* FIXME: do we have to lock something ? */ + for (cur_item = ct->sibling_list.next; + cur_item != &ct->sibling_list; cur_item = next) { + next = cur_item->next; + exp = list_entry(cur_item, struct ip_conntrack_expect, + expected_list); + + ip_ct_gre_keymap_destroy(exp); + if (!exp->sibling) { + ip_conntrack_unexpect_related(exp); + continue; + } + + DEBUGP("setting timeout of conntrack %p to 0\n", + exp->sibling); + exp->sibling->proto.gre.timeout = 0; + exp->sibling->proto.gre.stream_timeout = 0; + ip_ct_refresh(exp->sibling, 0); + } + + return 0; +} + +/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ +static inline int +exp_gre(struct ip_conntrack *master, + u_int32_t seq, + u_int16_t callid, + u_int16_t peer_callid) +{ + struct ip_conntrack_expect exp; + struct ip_conntrack_tuple inv_tuple; + + memset(&exp, 0, sizeof(exp)); + /* tuple in original direction, PNS->PAC */ + exp.tuple.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + exp.tuple.src.u.gre.key = htonl(ntohs(peer_callid)); + exp.tuple.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + exp.tuple.dst.u.gre.key = htonl(ntohs(callid)); + exp.tuple.dst.u.gre.protocol = __constant_htons(GRE_PROTOCOL_PPTP); + exp.tuple.dst.u.gre.version = GRE_VERSION_PPTP; + exp.tuple.dst.protonum = IPPROTO_GRE; + + exp.mask.src.ip = 0xffffffff; + exp.mask.src.u.all = 0; + exp.mask.dst.u.all = 0; + exp.mask.dst.u.gre.key = 0xffffffff; + exp.mask.dst.u.gre.version = 0xff; + exp.mask.dst.u.gre.protocol = 0xffff; + exp.mask.dst.ip = 0xffffffff; + exp.mask.dst.protonum = 0xffff; + + exp.seq = seq; + exp.expectfn = pptp_expectfn; + + exp.help.exp_pptp_info.pac_call_id = ntohs(callid); + exp.help.exp_pptp_info.pns_call_id = ntohs(peer_callid); + + DEBUGP("calling expect_related "); + DUMP_TUPLE_RAW(&exp.tuple); + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(&exp, &exp.tuple, 0) != 0) + return 1; + + invert_tuplepr(&inv_tuple, &exp.tuple); + if (ip_ct_gre_keymap_add(&exp, &inv_tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(&exp); + return 1; + } + + if (ip_conntrack_expect_related(master, &exp) != 0) { + ip_ct_gre_keymap_destroy(&exp); + DEBUGP("cannot expect_related()\n"); + return 1; + } + + /* tuple in reply direction, PAC->PNS */ + exp.tuple.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + exp.tuple.src.u.gre.key = htonl(ntohs(callid)); + exp.tuple.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + exp.tuple.dst.u.gre.key = htonl(ntohs(peer_callid)); + + DEBUGP("calling expect_related "); + DUMP_TUPLE_RAW(&exp.tuple); + + /* Add GRE keymap entries */ + ip_ct_gre_keymap_add(&exp, &exp.tuple, 0); + invert_tuplepr(&inv_tuple, &exp.tuple); + ip_ct_gre_keymap_add(&exp, &inv_tuple, 1); + /* FIXME: cannot handle error correctly, since we need to free + * the above keymap :( */ + + if (ip_conntrack_expect_related(master, &exp) != 0) { + /* free the second pair of keypmaps */ + ip_ct_gre_keymap_destroy(&exp); + DEBUGP("cannot expect_related():\n"); + return 1; + } + + return 0; +} + +static inline int +pptp_inbound_pkt(struct tcphdr *tcph, + struct pptp_pkt_hdr *pptph, + size_t datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader *ctlh; + union pptp_ctrl_union pptpReq; + + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg, *cid, *pcid; + u_int32_t seq; + + ctlh = (struct PptpControlHeader *) + ((char *) pptph + sizeof(struct pptp_pkt_hdr)); + pptpReq.rawreq = (void *) + ((char *) ctlh + sizeof(struct PptpControlHeader)); + + msg = ntohs(ctlh->messageType); + DEBUGP("inbound control message %s\n", strMName[msg]); + + switch (msg) { + case PPTP_START_SESSION_REPLY: + /* server confirms new control session */ + if (info->sstate < PPTP_SESSION_REQUESTED) { + DEBUGP("%s without START_SESS_REQUEST\n", + strMName[msg]); + break; + } + if (pptpReq.srep->resultCode == PPTP_START_OK) + info->sstate = PPTP_SESSION_CONFIRMED; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_STOP_SESSION_REPLY: + /* server confirms end of control session */ + if (info->sstate > PPTP_SESSION_STOPREQ) { + DEBUGP("%s without STOP_SESS_REQUEST\n", + strMName[msg]); + break; + } + if (pptpReq.strep->resultCode == PPTP_STOP_OK) + info->sstate = PPTP_SESSION_NONE; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_OUT_CALL_REPLY: + /* server accepted call, we now expect GRE frames */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", strMName[msg]); + break; + } + if (info->cstate != PPTP_CALL_OUT_REQ && + info->cstate != PPTP_CALL_OUT_CONF) { + DEBUGP("%s without OUTCALL_REQ\n", strMName[msg]); + break; + } + if (pptpReq.ocack->resultCode != PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_NONE; + break; + } + + cid = &pptpReq.ocack->callID; + pcid = &pptpReq.ocack->peersCallID; + + info->pac_call_id = ntohs(*cid); + + if (htons(info->pns_call_id) != *pcid) { + DEBUGP("%s for unknown callid %u\n", + strMName[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, CID=%X, PCID=%X\n", strMName[msg], + ntohs(*cid), ntohs(*pcid)); + + info->cstate = PPTP_CALL_OUT_CONF; + + seq = ntohl(tcph->seq) + ((void *)pcid - (void *)pptph); + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + break; + + case PPTP_IN_CALL_REQUEST: + /* server tells us about incoming call request */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", strMName[msg]); + break; + } + pcid = &pptpReq.icack->peersCallID; + DEBUGP("%s, PCID=%X\n", strMName[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_REQ; + info->pac_call_id= ntohs(*pcid); + break; + + case PPTP_IN_CALL_CONNECT: + /* server tells us about incoming call established */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", strMName[msg]); + break; + } + if (info->sstate != PPTP_CALL_IN_REP + && info->sstate != PPTP_CALL_IN_CONF) { + DEBUGP("%s but never sent IN_CALL_REPLY\n", + strMName[msg]); + break; + } + + pcid = &pptpReq.iccon->peersCallID; + cid = &info->pac_call_id; + + if (info->pns_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown CallID %u\n", + strMName[msg], ntohs(*cid)); + break; + } + + DEBUGP("%s, PCID=%X\n", strMName[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_CONF; + + /* we expect a GRE connection from PAC to PNS */ + seq = ntohl(tcph->seq) + ((void *)pcid - (void *)pptph); + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + + break; + + case PPTP_CALL_DISCONNECT_NOTIFY: + /* server confirms disconnect */ + cid = &pptpReq.disc->callID; + DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*cid)); + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_timeout_related(ct); + break; + + case PPTP_WAN_ERROR_NOTIFY: + break; + + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) + ? strMName[msg]:strMName[0], msg); + break; + } + + return NF_ACCEPT; + +} + +static inline int +pptp_outbound_pkt(struct tcphdr *tcph, + struct pptp_pkt_hdr *pptph, + size_t datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader *ctlh; + union pptp_ctrl_union pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg, *cid, *pcid; + + ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph)); + pptpReq.rawreq = (void *) ((void *) ctlh + sizeof(*ctlh)); + + msg = ntohs(ctlh->messageType); + DEBUGP("outbound control message %s\n", strMName[msg]); + + switch (msg) { + case PPTP_START_SESSION_REQUEST: + /* client requests for new control session */ + if (info->sstate != PPTP_SESSION_NONE) { + DEBUGP("%s but we already have one", + strMName[msg]); + } + info->sstate = PPTP_SESSION_REQUESTED; + break; + case PPTP_STOP_SESSION_REQUEST: + /* client requests end of control session */ + info->sstate = PPTP_SESSION_STOPREQ; + break; + + case PPTP_OUT_CALL_REQUEST: + /* client initiating connection to server */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", + strMName[msg]); + break; + } + info->cstate = PPTP_CALL_OUT_REQ; + /* track PNS call id */ + cid = &pptpReq.ocreq->callID; + DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*cid)); + info->pns_call_id = ntohs(*cid); + break; + case PPTP_IN_CALL_REPLY: + /* client answers incoming call */ + if (info->cstate != PPTP_CALL_IN_REQ + && info->cstate != PPTP_CALL_IN_REP) { + DEBUGP("%s without incall_req\n", + strMName[msg]); + break; + } + if (pptpReq.icack->resultCode != PPTP_INCALL_ACCEPT) { + info->cstate = PPTP_CALL_NONE; + break; + } + pcid = &pptpReq.icack->peersCallID; + if (info->pac_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown call %u\n", + strMName[msg], ntohs(*pcid)); + break; + } + DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*pcid)); + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = ntohs(pptpReq.icack->callID); + break; + + case PPTP_CALL_CLEAR_REQUEST: + /* client requests hangup of call */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("CLEAR_CALL but no session\n"); + break; + } + /* FUTURE: iterate over all calls and check if + * call ID is valid. We don't do this without newnat, + * because we only know about last call */ + info->cstate = PPTP_CALL_CLEAR_REQ; + break; + case PPTP_SET_LINK_INFO: + break; + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + strMName[msg]:strMName[0], msg); + /* unknown: no need to create GRE masq table entry */ + break; + } + + return NF_ACCEPT; +} + + +/* track caller id inside control connection, call expect_related */ +static int +conntrack_pptp_help(const struct iphdr *iph, size_t len, + struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) + +{ + struct pptp_pkt_hdr *pptph; + + struct tcphdr *tcph = (void *) iph + iph->ihl * 4; + u_int32_t tcplen = len - iph->ihl * 4; + u_int32_t datalen = tcplen - tcph->doff * 4; + void *datalimit; + int dir = CTINFO2DIR(ctinfo); + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + + int oldsstate, oldcstate; + int ret; + + /* don't do any tracking before tcp handshake complete */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ctinfo = %u, skipping\n", ctinfo); + return NF_ACCEPT; + } + + /* not a complete TCP header? */ + if (tcplen < sizeof(struct tcphdr) || tcplen < tcph->doff * 4) { + DEBUGP("tcplen = %u\n", tcplen); + return NF_ACCEPT; + } + + /* checksum invalid? */ + if (tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr, + csum_partial((char *) tcph, tcplen, 0))) { + printk(KERN_NOTICE __FILE__ ": bad csum\n"); + /* W2K PPTP server sends TCP packets with wrong checksum :(( */ + //return NF_ACCEPT; + } + + if (tcph->fin || tcph->rst) { + DEBUGP("RST/FIN received, timeouting GRE\n"); + /* can't do this after real newnat */ + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_timeout_related(ct); + } + + + pptph = (struct pptp_pkt_hdr *) ((void *) tcph + tcph->doff * 4); + datalimit = (void *) pptph + datalen; + + /* not a full pptp packet header? */ + if ((void *) pptph+sizeof(*pptph) >= datalimit) { + DEBUGP("no full PPTP header, can't track\n"); + return NF_ACCEPT; + } + + /* if it's not a control message we can't do anything with it */ + if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { + DEBUGP("not a control packet\n"); + return NF_ACCEPT; + } + + oldsstate = info->sstate; + oldcstate = info->cstate; + + LOCK_BH(&ip_pptp_lock); + + /* FIXME: We just blindly assume that the control connection is always + * established from PNS->PAC. However, RFC makes no guarantee */ + if (dir == IP_CT_DIR_ORIGINAL) + /* client -> server (PNS -> PAC) */ + ret = pptp_outbound_pkt(tcph, pptph, datalen, ct, ctinfo); + else + /* server -> client (PAC -> PNS) */ + ret = pptp_inbound_pkt(tcph, pptph, datalen, ct, ctinfo); + DEBUGP("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); + UNLOCK_BH(&ip_pptp_lock); + + return ret; +} + +/* control protocol helper */ +static struct ip_conntrack_helper pptp = { + .list = { NULL, NULL }, + .name = "pptp", + .flags = IP_CT_HELPER_F_REUSE_EXPECT, + .me = THIS_MODULE, + .max_expected = 2, + .timeout = 0, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = 0xffff } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xffff + } + }, + .help = conntrack_pptp_help +}; + +/* ip_conntrack_pptp initialization */ +static int __init init(void) +{ + int retcode; + + DEBUGP(__FILE__ ": registering helper\n"); + if ((retcode = ip_conntrack_helper_register(&pptp))) { + printk(KERN_ERR "Unable to register conntrack application " + "helper for pptp: %d\n", retcode); + return -EIO; + } + + printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&pptp); + printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_pptp_lock); diff -uNr linux_org/net/ipv4/netfilter/ip_conntrack_pptp_priv.h linux/net/ipv4/netfilter/ip_conntrack_pptp_priv.h --- linux_org/net/ipv4/netfilter/ip_conntrack_pptp_priv.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_conntrack_pptp_priv.h 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,24 @@ +#ifndef _IP_CT_PPTP_PRIV_H +#define _IP_CT_PPTP_PRIV_H + +/* PptpControlMessageType names */ +static const char *strMName[] = { + "UNKNOWN_MESSAGE", + "START_SESSION_REQUEST", + "START_SESSION_REPLY", + "STOP_SESSION_REQUEST", + "STOP_SESSION_REPLY", + "ECHO_REQUEST", + "ECHO_REPLY", + "OUT_CALL_REQUEST", + "OUT_CALL_REPLY", + "IN_CALL_REQUEST", + "IN_CALL_REPLY", + "IN_CALL_CONNECT", + "CALL_CLEAR_REQUEST", + "CALL_DISCONNECT_NOTIFY", + "WAN_ERROR_NOTIFY", + "SET_LINK_INFO" +}; + +#endif diff -uNr linux_org/net/ipv4/netfilter/ip_conntrack_proto_gre.c linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c --- linux_org/net/ipv4/netfilter/ip_conntrack_proto_gre.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,343 @@ +/* + * ip_conntrack_proto_gre.c - Version 1.2 + * + * Connection tracking protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/list.h> + +#include <linux/netfilter_ipv4/lockhelp.h> + +DECLARE_RWLOCK(ip_ct_gre_lock); +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_ct_gre_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_ct_gre_lock) + +#include <linux/netfilter_ipv4/listhelp.h> +#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> + +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); + +/* shamelessly stolen from ip_conntrack_proto_udp.c */ +#define GRE_TIMEOUT (30*HZ) +#define GRE_STREAM_TIMEOUT (180*HZ) + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ + ": " format, ## args) +#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x:%u:0x%x\n", \ + NIPQUAD((x)->src.ip), ntohl((x)->src.u.gre.key), \ + NIPQUAD((x)->dst.ip), ntohl((x)->dst.u.gre.key), \ + (x)->dst.u.gre.version, \ + ntohs((x)->dst.u.gre.protocol)) +#else +#define DEBUGP(x, args...) +#define DUMP_TUPLE_GRE(x) +#endif + +/* GRE KEYMAP HANDLING FUNCTIONS */ +static LIST_HEAD(gre_keymap_list); + +static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, + const struct ip_conntrack_tuple *t) +{ + return ((km->tuple.src.ip == t->src.ip) && + (km->tuple.dst.ip == t->dst.ip) && + (km->tuple.dst.protonum == t->dst.protonum) && + (km->tuple.dst.u.all == t->dst.u.all)); +} + +/* look up the source key for a given tuple */ +static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) +{ + struct ip_ct_gre_keymap *km; + u_int32_t key; + + READ_LOCK(&ip_ct_gre_lock); + km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (!km) { + READ_UNLOCK(&ip_ct_gre_lock); + return 0; + } + + key = km->tuple.src.u.gre.key; + READ_UNLOCK(&ip_ct_gre_lock); + + return key; +} + +/* add a single keymap entry, associate with specified expect */ +int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp, + struct ip_conntrack_tuple *t, int reply) +{ + struct ip_ct_gre_keymap *km; + + km = kmalloc(sizeof(*km), GFP_ATOMIC); + if (!km) + return -1; + + /* initializing list head should be sufficient */ + memset(km, 0, sizeof(*km)); + + memcpy(&km->tuple, t, sizeof(*t)); + + if (!reply) + exp->proto.gre.keymap_orig = km; + else + exp->proto.gre.keymap_reply = km; + + DEBUGP("adding new entry %p: ", km); + DUMP_TUPLE_GRE(&km->tuple); + + WRITE_LOCK(&ip_ct_gre_lock); + list_append(&gre_keymap_list, km); + WRITE_UNLOCK(&ip_ct_gre_lock); + + return 0; +} + +/* change the tuple of a keymap entry (used by nat helper) */ +void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km, + struct ip_conntrack_tuple *t) +{ + DEBUGP("changing entry %p to: ", km); + DUMP_TUPLE_GRE(t); + + WRITE_LOCK(&ip_ct_gre_lock); + memcpy(&km->tuple, t, sizeof(km->tuple)); + WRITE_UNLOCK(&ip_ct_gre_lock); +} + +/* destroy the keymap entries associated with specified expect */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp) +{ + DEBUGP("entering for exp %p\n", exp); + WRITE_LOCK(&ip_ct_gre_lock); + if (exp->proto.gre.keymap_orig) { + DEBUGP("removing %p from list\n", exp->proto.gre.keymap_orig); + list_del(&exp->proto.gre.keymap_orig->list); + kfree(exp->proto.gre.keymap_orig); + exp->proto.gre.keymap_orig = NULL; + } + if (exp->proto.gre.keymap_reply) { + DEBUGP("removing %p from list\n", exp->proto.gre.keymap_reply); + list_del(&exp->proto.gre.keymap_reply->list); + kfree(exp->proto.gre.keymap_reply); + exp->proto.gre.keymap_reply = NULL; + } + WRITE_UNLOCK(&ip_ct_gre_lock); +} + + +/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ + +/* invert gre part of tuple */ +static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->dst.u.gre.protocol = orig->dst.u.gre.protocol; + tuple->dst.u.gre.version = orig->dst.u.gre.version; + + tuple->dst.u.gre.key = orig->src.u.gre.key; + tuple->src.u.gre.key = orig->dst.u.gre.key; + + return 1; +} + +/* gre hdr info to tuple */ +static int gre_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + struct gre_hdr *grehdr = (struct gre_hdr *) datah; + struct gre_hdr_pptp *pgrehdr = (struct gre_hdr_pptp *) datah; + u_int32_t srckey; + + /* core guarantees 8 protocol bytes, no need for size check */ + + tuple->dst.u.gre.version = grehdr->version; + tuple->dst.u.gre.protocol = grehdr->protocol; + + switch (grehdr->version) { + case GRE_VERSION_1701: + if (!grehdr->key) { + DEBUGP("Can't track GRE without key\n"); + return 0; + } + tuple->dst.u.gre.key = *(gre_key(grehdr)); + break; + + case GRE_VERSION_PPTP: + if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + return 0; + } + tuple->dst.u.gre.key = htonl(ntohs(pgrehdr->call_id)); + break; + + default: + printk(KERN_WARNING "unknown GRE version %hu\n", + tuple->dst.u.gre.version); + return 0; + } + + srckey = gre_keymap_lookup(tuple); + +#if 0 + DEBUGP("found src key %x for tuple ", ntohl(srckey)); + DUMP_TUPLE_GRE(tuple); +#endif + tuple->src.u.gre.key = srckey; + + return 1; +} + +/* print gre part of tuple */ +static unsigned int gre_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return sprintf(buffer, "version=%d protocol=0x%04x srckey=0x%x dstkey=0x%x ", + tuple->dst.u.gre.version, + ntohs(tuple->dst.u.gre.protocol), + ntohl(tuple->src.u.gre.key), + ntohl(tuple->dst.u.gre.key)); +} + +/* print private data for conntrack */ +static unsigned int gre_print_conntrack(char *buffer, + const struct ip_conntrack *ct) +{ + return sprintf(buffer, "timeout=%u, stream_timeout=%u ", + (ct->proto.gre.timeout / HZ), + (ct->proto.gre.stream_timeout / HZ)); +} + +/* Returns verdict for packet, and may modify conntrack */ +static int gre_packet(struct ip_conntrack *ct, + struct iphdr *iph, size_t len, + enum ip_conntrack_info conntrackinfo) +{ + /* If we've seen traffic both ways, this is a GRE connection. + * Extend timeout. */ + if (ct->status & IPS_SEEN_REPLY) { + ip_ct_refresh(ct, ct->proto.gre.stream_timeout); + /* Also, more likely to be important, and not a probe. */ + set_bit(IPS_ASSURED_BIT, &ct->status); + } else + ip_ct_refresh(ct, ct->proto.gre.timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int gre_new(struct ip_conntrack *ct, + struct iphdr *iph, size_t len) +{ + DEBUGP(": "); + DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + /* initialize to sane value. Ideally a conntrack helper + * (e.g. in case of pptp) is increasing them */ + ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; + ct->proto.gre.timeout = GRE_TIMEOUT; + + return 1; +} + +/* Called when a conntrack entry has already been removed from the hashes + * and is about to be deleted from memory */ +static void gre_destroy(struct ip_conntrack *ct) +{ + struct ip_conntrack_expect *master = ct->master; + + DEBUGP(" entering\n"); + + if (!master) { + DEBUGP("no master exp for ct %p\n", ct); + return; + } + + ip_ct_gre_keymap_destroy(master); +} + +/* protocol helper struct */ +static struct ip_conntrack_protocol gre = { { NULL, NULL }, IPPROTO_GRE, + "gre", + gre_pkt_to_tuple, + gre_invert_tuple, + gre_print_tuple, + gre_print_conntrack, + gre_packet, + gre_new, + gre_destroy, + NULL, + THIS_MODULE }; + +/* ip_conntrack_proto_gre initialization */ +static int __init init(void) +{ + int retcode; + + if ((retcode = ip_conntrack_protocol_register(&gre))) { + printk(KERN_ERR "Unable to register conntrack protocol " + "helper for gre: %d\n", retcode); + return -EIO; + } + + return 0; +} + +static void __exit fini(void) +{ + struct list_head *pos, *n; + + /* delete all keymap entries */ + WRITE_LOCK(&ip_ct_gre_lock); + list_for_each_safe(pos, n, &gre_keymap_list) { + DEBUGP("deleting keymap %p at module unload time\n", pos); + list_del(pos); + kfree(pos); + } + WRITE_UNLOCK(&ip_ct_gre_lock); + + ip_conntrack_protocol_unregister(&gre); +} + +EXPORT_SYMBOL(ip_ct_gre_keymap_add); +EXPORT_SYMBOL(ip_ct_gre_keymap_change); +EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); + +module_init(init); +module_exit(fini); diff -uNr linux_org/net/ipv4/netfilter/ip_nat_core.c linux/net/ipv4/netfilter/ip_nat_core.c --- linux_org/net/ipv4/netfilter/ip_nat_core.c 2004-11-24 12:14:04.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_nat_core.c 2006-10-27 14:11:52.000000000 +0200 @@ -430,7 +430,7 @@ *tuple = *orig_tuple; while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum)) != NULL) { - DEBUGP("Found best for "); DUMP_TUPLE(tuple); + DEBUGP("Found best for "); DUMP_TUPLE_RAW(tuple); /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ @@ -572,9 +572,9 @@ HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST", conntrack); DEBUGP("Original: "); - DUMP_TUPLE(&orig_tp); + DUMP_TUPLE_RAW(&orig_tp); DEBUGP("New: "); - DUMP_TUPLE(&new_tuple); + DUMP_TUPLE_RAW(&new_tuple); #endif /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT): diff -uNr linux_org/net/ipv4/netfilter/ip_nat_core.c.orig linux/net/ipv4/netfilter/ip_nat_core.c.orig --- linux_org/net/ipv4/netfilter/ip_nat_core.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_nat_core.c.orig 2004-11-24 12:14:04.000000000 +0100 @@ -0,0 +1,1014 @@ +/* NAT for netfilter; shared with compatibility layer. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ +#include <linux/version.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/netfilter_ipv4.h> +#include <linux/brlock.h> +#include <linux/vmalloc.h> +#include <net/checksum.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/tcp.h> /* For tcp_prot in getorigdst */ + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_protocol.h> +#include <linux/netfilter_ipv4/ip_nat_core.h> +#include <linux/netfilter_ipv4/ip_nat_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/listhelp.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_nat_lock); +DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); + +/* Calculated at init based on memory size */ +static unsigned int ip_nat_htable_size; + +static struct list_head *bysource; +static struct list_head *byipsproto; +LIST_HEAD(protos); +LIST_HEAD(helpers); + +extern struct ip_nat_protocol unknown_nat_protocol; + +/* We keep extra hashes for each conntrack, for fast searching. */ +static inline size_t +hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto) +{ + /* Modified src and dst, to ensure we don't create two + identical streams. */ + return (src + dst + proto) % ip_nat_htable_size; +} + +static inline size_t +hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto) +{ + /* Original src, to ensure we map it consistently if poss. */ + return (manip->ip + manip->u.all + proto) % ip_nat_htable_size; +} + +/* Noone using conntrack by the time this called. */ +static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) +{ + struct ip_nat_info *info = &conn->nat.info; + unsigned int hs, hp; + + if (!info->initialized) + return; + + IP_NF_ASSERT(info->bysource.conntrack); + IP_NF_ASSERT(info->byipsproto.conntrack); + + hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src, + conn->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + + hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&bysource[hs], &info->bysource); + LIST_DELETE(&byipsproto[hp], &info->byipsproto); + WRITE_UNLOCK(&ip_nat_lock); +} + +/* We do checksum mangling, so if they were wrong before they're still + * wrong. Also works for incomplete packets (eg. ICMP dest + * unreachables.) */ +u_int16_t +ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + +static inline int cmp_proto(const struct ip_nat_protocol *i, int proto) +{ + return i->protonum == proto; +} + +struct ip_nat_protocol * +find_nat_proto(u_int16_t protonum) +{ + struct ip_nat_protocol *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum); + if (!i) + i = &unknown_nat_protocol; + return i; +} + +/* Is this tuple already taken? (not by us) */ +int +ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + /* Conntrack tracking doesn't keep track of outgoing tuples; only + incoming ones. NAT means they don't have a fixed mapping, + so we invert the tuple and look for the incoming reply. + + We could keep a separate hash if this proves too slow. */ + struct ip_conntrack_tuple reply; + + invert_tuplepr(&reply, tuple); + return ip_conntrack_tuple_taken(&reply, ignored_conntrack); +} + +/* Does tuple + the source manip come within the range mr */ +static int +in_range(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_manip *manip, + const struct ip_nat_multi_range *mr) +{ + struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum); + unsigned int i; + struct ip_conntrack_tuple newtuple = { *manip, tuple->dst }; + + for (i = 0; i < mr->rangesize; i++) { + /* If we are allowed to map IPs, then we must be in the + range specified, otherwise we must be unchanged. */ + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip) + || (ntohl(newtuple.src.ip) + > ntohl(mr->range[i].max_ip))) + continue; + } else { + if (newtuple.src.ip != tuple->src.ip) + continue; + } + + if ((mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED) + && proto->in_range(&newtuple, IP_NAT_MANIP_SRC, + &mr->range[i].min, &mr->range[i].max)) + return 1; + } + return 0; +} + +static inline int +src_cmp(const struct ip_nat_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum + == tuple->dst.protonum + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip + == tuple->src.ip + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all + == tuple->src.u.all + && in_range(tuple, + &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + mr)); +} + +/* Only called for SRC manip */ +static struct ip_conntrack_manip * +find_appropriate_src(const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum); + struct ip_nat_hash *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr); + if (i) + return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src; + else + return NULL; +} + +#ifdef CONFIG_IP_NF_NAT_LOCAL +/* If it's really a local destination manip, it may need to do a + source manip too. */ +static int +do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp) +{ + struct rtable *rt; + + /* FIXME: IPTOS_TOS(iph->tos) --RR */ + if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) { + DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n", + NIPQUAD(var_ip)); + return 0; + } + + *other_ipp = rt->rt_src; + ip_rt_put(rt); + return 1; +} +#endif + +/* Simple way to iterate through all. */ +static inline int fake_cmp(const struct ip_nat_hash *i, + u_int32_t src, u_int32_t dst, u_int16_t protonum, + unsigned int *score, + const struct ip_conntrack *conntrack) +{ + /* Compare backwards: we're dealing with OUTGOING tuples, and + inside the conntrack is the REPLY tuple. Don't count this + conntrack. */ + if (i->conntrack != conntrack + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src + && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum + == protonum)) + (*score)++; + return 0; +} + +static inline unsigned int +count_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum, + const struct ip_conntrack *conntrack) +{ + unsigned int score = 0; + unsigned int h; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + h = hash_by_ipsproto(src, dst, protonum); + LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *, + src, dst, protonum, &score, conntrack); + + return score; +} + +/* For [FUTURE] fragmentation handling, we want the least-used + src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + 1-65535, we don't do pro-rata allocation based on ports; we choose + the ip with the lowest src-ip/dst-ip/proto usage. + + If an allocation then fails (eg. all 6 ports used in the 1.2.3.4 + range), we eliminate that and try again. This is not the most + efficient approach, but if you're worried about that, don't hand us + ranges you don't really have. */ +static struct ip_nat_range * +find_best_ips_proto(struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr, + const struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + unsigned int i; + struct { + const struct ip_nat_range *range; + unsigned int score; + struct ip_conntrack_tuple tuple; + } best = { NULL, 0xFFFFFFFF }; + u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip; + static unsigned int randomness = 0; + + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { + var_ipp = &tuple->src.ip; + saved_ip = tuple->dst.ip; + other_ipp = &tuple->dst.ip; + } else { + var_ipp = &tuple->dst.ip; + saved_ip = tuple->src.ip; + other_ipp = &tuple->src.ip; + } + /* Don't do do_extra_mangle unless neccessary (overrides + explicit socket bindings, for example) */ + orig_dstip = tuple->dst.ip; + + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) { + /* Host order */ + u_int32_t minip, maxip, j; + + /* Don't do ranges which are already eliminated. */ + if (mr->range[i].flags & IP_NAT_RANGE_FULL) { + continue; + } + + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + minip = ntohl(mr->range[i].min_ip); + maxip = ntohl(mr->range[i].max_ip); + } else + minip = maxip = ntohl(*var_ipp); + + randomness++; + for (j = 0; j < maxip - minip + 1; j++) { + unsigned int score; + + *var_ipp = htonl(minip + (randomness + j) + % (maxip - minip + 1)); + + /* Reset the other ip in case it was mangled by + * do_extra_mangle last time. */ + *other_ipp = saved_ip; + +#ifdef CONFIG_IP_NF_NAT_LOCAL + if (hooknum == NF_IP_LOCAL_OUT + && *var_ipp != orig_dstip + && !do_extra_mangle(*var_ipp, other_ipp)) { + DEBUGP("Range %u %u.%u.%u.%u rt failed!\n", + i, NIPQUAD(*var_ipp)); + /* Can't route? This whole range part is + * probably screwed, but keep trying + * anyway. */ + continue; + } +#endif + + /* Count how many others map onto this. */ + score = count_maps(tuple->src.ip, tuple->dst.ip, + tuple->dst.protonum, conntrack); + if (score < best.score) { + /* Optimization: doesn't get any better than + this. */ + if (score == 0) + return (struct ip_nat_range *) + &mr->range[i]; + + best.score = score; + best.tuple = *tuple; + best.range = &mr->range[i]; + } + } + } + *tuple = best.tuple; + + /* Discard const. */ + return (struct ip_nat_range *)best.range; +} + +/* Fast version doesn't iterate through hash chains, but only handles + common case of single IP address (null NAT, masquerade) */ +static struct ip_nat_range * +find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr, + const struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + if (mr->rangesize != 1 + || (mr->range[0].flags & IP_NAT_RANGE_FULL) + || ((mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) + && mr->range[0].min_ip != mr->range[0].max_ip)) + return find_best_ips_proto(tuple, mr, conntrack, hooknum); + + if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) + tuple->src.ip = mr->range[0].min_ip; + else { + /* Only do extra mangle when required (breaks + socket binding) */ +#ifdef CONFIG_IP_NF_NAT_LOCAL + if (tuple->dst.ip != mr->range[0].min_ip + && hooknum == NF_IP_LOCAL_OUT + && !do_extra_mangle(mr->range[0].min_ip, + &tuple->src.ip)) + return NULL; +#endif + tuple->dst.ip = mr->range[0].min_ip; + } + } + + /* Discard const. */ + return (struct ip_nat_range *)&mr->range[0]; +} + +static int +get_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig_tuple, + const struct ip_nat_multi_range *mrr, + struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + struct ip_nat_protocol *proto + = find_nat_proto(orig_tuple->dst.protonum); + struct ip_nat_range *rptr; + unsigned int i; + int ret; + + /* We temporarily use flags for marking full parts, but we + always clean up afterwards */ + struct ip_nat_multi_range *mr = (void *)mrr; + + /* 1) If this srcip/proto/src-proto-part is currently mapped, + and that same mapping gives a unique tuple within the given + range, use that. + + This is only required for source (ie. NAT/masq) mappings. + So far, we don't do local source mappings, so multiple + manips not an issue. */ + if (hooknum == NF_IP_POST_ROUTING) { + struct ip_conntrack_manip *manip; + + manip = find_appropriate_src(orig_tuple, mr); + if (manip) { + /* Apply same source manipulation. */ + *tuple = ((struct ip_conntrack_tuple) + { *manip, orig_tuple->dst }); + DEBUGP("get_unique_tuple: Found current src map\n"); + return 1; + } + } + + /* 2) Select the least-used IP/proto combination in the given + range. + */ + *tuple = *orig_tuple; + while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum)) + != NULL) { + DEBUGP("Found best for "); DUMP_TUPLE(tuple); + /* 3) The per-protocol part of the manip is made to + map into the range to make a unique tuple. */ + + /* Only bother mapping if it's not already in range + and unique */ + if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED) + || proto->in_range(tuple, HOOK2MANIP(hooknum), + &rptr->min, &rptr->max)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ret = 1; + goto clear_fulls; + } else { + if (proto->unique_tuple(tuple, rptr, + HOOK2MANIP(hooknum), + conntrack)) { + /* Must be unique. */ + IP_NF_ASSERT(!ip_nat_used_tuple(tuple, + conntrack)); + ret = 1; + goto clear_fulls; + } else if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) { + /* Try implicit source NAT; protocol + may be able to play with ports to + make it unique. */ + struct ip_nat_range r + = { IP_NAT_RANGE_MAP_IPS, + tuple->src.ip, tuple->src.ip, + { 0 }, { 0 } }; + DEBUGP("Trying implicit mapping\n"); + if (proto->unique_tuple(tuple, &r, + IP_NAT_MANIP_SRC, + conntrack)) { + /* Must be unique. */ + IP_NF_ASSERT(!ip_nat_used_tuple + (tuple, conntrack)); + ret = 1; + goto clear_fulls; + } + } + DEBUGP("Protocol can't get unique tuple %u.\n", + hooknum); + } + + /* Eliminate that from range, and try again. */ + rptr->flags |= IP_NAT_RANGE_FULL; + *tuple = *orig_tuple; + } + + ret = 0; + + clear_fulls: + /* Clear full flags. */ + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) + mr->range[i].flags &= ~IP_NAT_RANGE_FULL; + + return ret; +} + +static inline int +helper_cmp(const struct ip_nat_helper *helper, + const struct ip_conntrack_tuple *tuple) +{ + return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask); +} + +/* Where to manip the reply packets (will be reverse manip). */ +static unsigned int opposite_hook[NF_IP_NUMHOOKS] += { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, + [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING, +#ifdef CONFIG_IP_NF_NAT_LOCAL + [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN, + [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT, +#endif +}; + +unsigned int +ip_nat_setup_info(struct ip_conntrack *conntrack, + const struct ip_nat_multi_range *mr, + unsigned int hooknum) +{ + struct ip_conntrack_tuple new_tuple, inv_tuple, reply; + struct ip_conntrack_tuple orig_tp; + struct ip_nat_info *info = &conntrack->nat.info; + int in_hashes = info->initialized; + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING + || hooknum == NF_IP_POST_ROUTING + || hooknum == NF_IP_LOCAL_IN + || hooknum == NF_IP_LOCAL_OUT); + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); + + /* What we've got will look like inverse of reply. Normally + this is what is in the conntrack, except for prior + manipulations (future optimization: if num_manips == 0, + orig_tp = + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ + invert_tuplepr(&orig_tp, + &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); + +#if 0 + { + unsigned int i; + + DEBUGP("Hook %u (%s), ", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST"); + DUMP_TUPLE(&orig_tp); + DEBUGP("Range %p: ", mr); + for (i = 0; i < mr->rangesize; i++) { + DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n", + i, + (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) + ? " MAP_IPS" : "", + (mr->range[i].flags + & IP_NAT_RANGE_PROTO_SPECIFIED) + ? " PROTO_SPECIFIED" : "", + (mr->range[i].flags & IP_NAT_RANGE_FULL) + ? " FULL" : "", + NIPQUAD(mr->range[i].min_ip), + NIPQUAD(mr->range[i].max_ip), + mr->range[i].min.all, + mr->range[i].max.all); + } + } +#endif + + do { + if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack, + hooknum)) { + DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n", + conntrack); + return NF_DROP; + } + +#if 0 + DEBUGP("Hook %u (%s) %p\n", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST", + conntrack); + DEBUGP("Original: "); + DUMP_TUPLE(&orig_tp); + DEBUGP("New: "); + DUMP_TUPLE(&new_tuple); +#endif + + /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT): + the original (A/B/C/D') and the mangled one (E/F/G/H'). + + We're only allowed to work with the SRC per-proto + part, so we create inverses of both to start, then + derive the other fields we need. */ + + /* Reply connection: simply invert the new tuple + (G/H/E/F') */ + invert_tuplepr(&reply, &new_tuple); + + /* Alter conntrack table so it recognizes replies. + If fail this race (reply tuple now used), repeat. */ + } while (!ip_conntrack_alter_reply(conntrack, &reply)); + + /* FIXME: We can simply used existing conntrack reply tuple + here --RR */ + /* Create inverse of original: C/D/A/B' */ + invert_tuplepr(&inv_tuple, &orig_tp); + + /* Has source changed?. */ + if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) { + /* In this direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_SRC, new_tuple.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a destination manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_DST, orig_tp.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* Has destination changed? */ + if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) { + /* In this direction, a destination manip */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_DST, reply.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_SRC, inv_tuple.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* If there's a helper, assign it; based on new tuple. */ + if (!conntrack->master) + info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, + &reply); + + /* It's done. */ + info->initialized |= (1 << HOOK2MANIP(hooknum)); + + if (in_hashes) { + IP_NF_ASSERT(info->bysource.conntrack); + replace_in_hashes(conntrack, info); + } else { + place_in_hashes(conntrack, info); + } + + return NF_ACCEPT; +} + +void replace_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + /* Source has changed, so replace in hashes. */ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(info->bysource.conntrack == conntrack); + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + + list_del(&info->bysource.list); + list_del(&info->byipsproto.list); + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +void place_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(!info->bysource.conntrack); + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + info->byipsproto.conntrack = conntrack; + info->bysource.conntrack = conntrack; + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +static void +manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype, + __u32 *nfcache) +{ + *nfcache |= NFC_ALTERED; + find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype); + + if (maniptype == IP_NAT_MANIP_SRC) { + iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip, + iph->check); + iph->saddr = manip->ip; + } else { + iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip, + iph->check); + iph->daddr = manip->ip; + } +#if 0 + if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + DEBUGP("IP: checksum on packet bad.\n"); + + if (proto == IPPROTO_TCP) { + void *th = (u_int32_t *)iph + iph->ihl; + if (tcp_v4_check(th, len - 4*iph->ihl, iph->saddr, iph->daddr, + csum_partial((char *)th, len-4*iph->ihl, 0))) + DEBUGP("TCP: checksum on packet bad\n"); + } +#endif +} + +static inline int exp_for_packet(struct ip_conntrack_expect *exp, + struct sk_buff **pskb) +{ + struct ip_conntrack_protocol *proto; + int ret = 1; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + proto = __ip_ct_find_proto((*pskb)->nh.iph->protocol); + if (proto->exp_matches_pkt) + ret = proto->exp_matches_pkt(exp, pskb); + + return ret; +} + +/* Do packet manipulations according to binding. */ +unsigned int +do_bindings(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct ip_nat_info *info, + unsigned int hooknum, + struct sk_buff **pskb) +{ + unsigned int i; + struct ip_nat_helper *helper; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + int is_tcp = (*pskb)->nh.iph->protocol == IPPROTO_TCP; + + /* Need nat lock to protect against modification, but neither + conntrack (referenced) and helper (deleted with + synchronize_bh()) can vanish. */ + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + /* raw socket (tcpdump) may have clone of incoming + skb: don't disturb it --RR */ + if (skb_cloned(*pskb) && !(*pskb)->sk) { + struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) { + READ_UNLOCK(&ip_nat_lock); + return NF_DROP; + } + kfree_skb(*pskb); + *pskb = nskb; + } + + if (info->manips[i].direction == dir + && info->manips[i].hooknum == hooknum) { + DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n", + *pskb, + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + NIPQUAD(info->manips[i].manip.ip), + htons(info->manips[i].manip.u.all)); + manip_pkt((*pskb)->nh.iph->protocol, + (*pskb)->nh.iph, + (*pskb)->len, + &info->manips[i].manip, + info->manips[i].maniptype, + &(*pskb)->nfcache); + } + } + helper = info->helper; + READ_UNLOCK(&ip_nat_lock); + + if (helper) { + struct ip_conntrack_expect *exp = NULL; + struct list_head *cur_item; + int ret = NF_ACCEPT; + int helper_called = 0; + + DEBUGP("do_bindings: helper existing for (%p)\n", ct); + + /* Always defragged for helpers */ + IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off + & htons(IP_MF|IP_OFFSET))); + + /* Have to grab read lock before sibling_list traversal */ + READ_LOCK(&ip_conntrack_lock); + list_for_each_prev(cur_item, &ct->sibling_list) { + exp = list_entry(cur_item, struct ip_conntrack_expect, + expected_list); + + /* if this expectation is already established, skip */ + if (exp->sibling) + continue; + + if (exp_for_packet(exp, pskb)) { + /* FIXME: May be true multiple times in the + * case of UDP!! */ + DEBUGP("calling nat helper (exp=%p) for packet\n", exp); + ret = helper->help(ct, exp, info, ctinfo, + hooknum, pskb); + if (ret != NF_ACCEPT) { + READ_UNLOCK(&ip_conntrack_lock); + return ret; + } + helper_called = 1; + } + } + /* Helper might want to manip the packet even when there is no + * matching expectation for this packet */ + if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) { + DEBUGP("calling nat helper for packet without expectation\n"); + ret = helper->help(ct, NULL, info, ctinfo, + hooknum, pskb); + if (ret != NF_ACCEPT) { + READ_UNLOCK(&ip_conntrack_lock); + return ret; + } + } + READ_UNLOCK(&ip_conntrack_lock); + + /* Adjust sequence number only once per packet + * (helper is called at all hooks) */ + if (is_tcp && (hooknum == NF_IP_POST_ROUTING + || hooknum == NF_IP_LOCAL_IN)) { + DEBUGP("ip_nat_core: adjusting sequence number\n"); + /* future: put this in a l4-proto specific function, + * and call this function here. */ + ip_nat_seq_adjust(*pskb, ct, ctinfo); + } + + return ret; + + } else + return NF_ACCEPT; + + /* not reached */ +} + +unsigned int +icmp_reply_translation(struct sk_buff *skb, + struct ip_conntrack *conntrack, + unsigned int hooknum, + int dir) +{ + struct iphdr *iph = skb->nh.iph; + struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + struct iphdr *inner = (struct iphdr *)(hdr + 1); + size_t datalen = skb->len - ((void *)inner - (void *)iph); + unsigned int i; + struct ip_nat_info *info = &conntrack->nat.info; + + IP_NF_ASSERT(skb->len >= iph->ihl*4 + sizeof(struct icmphdr)); + /* Must be RELATED */ + IP_NF_ASSERT(skb->nfct + - ((struct ip_conntrack *)skb->nfct->master)->infos + == IP_CT_RELATED + || skb->nfct + - ((struct ip_conntrack *)skb->nfct->master)->infos + == IP_CT_RELATED+IP_CT_IS_REPLY); + + /* Redirects on non-null nats must be dropped, else they'll + start talking to each other without our translation, and be + confused... --RR */ + if (hdr->type == ICMP_REDIRECT) { + /* Don't care about races here. */ + if (info->initialized + != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST)) + || info->num_manips != 0) + return NF_DROP; + } + + DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n", + skb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); + /* Note: May not be from a NAT'd host, but probably safest to + do translation always as if it came from the host itself + (even though a "host unreachable" coming from the host + itself is a bit weird). + + More explanation: some people use NAT for anonymizing. + Also, CERT recommends dropping all packets from private IP + addresses (although ICMP errors from internal links with + such addresses are not too uncommon, as Alan Cox points + out) */ + + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + DEBUGP("icmp_reply: manip %u dir %s hook %u\n", + i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ? + "ORIG" : "REPLY", info->manips[i].hooknum); + + if (info->manips[i].direction != dir) + continue; + + /* Mapping the inner packet is just like a normal + packet, except it was never src/dst reversed, so + where we would normally apply a dst manip, we apply + a src, and vice versa. */ + if (info->manips[i].hooknum == hooknum) { + DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "DST" : "SRC", + NIPQUAD(info->manips[i].manip.ip), + ntohs(info->manips[i].manip.u.udp.port)); + manip_pkt(inner->protocol, inner, + skb->len - ((void *)inner - (void *)iph), + &info->manips[i].manip, + !info->manips[i].maniptype, + &skb->nfcache); + /* Outer packet needs to have IP header NATed like + it's a reply. */ + + /* Use mapping to map outer packet: 0 give no + per-proto mapping */ + DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + NIPQUAD(info->manips[i].manip.ip)); + manip_pkt(0, iph, skb->len, + &info->manips[i].manip, + info->manips[i].maniptype, + &skb->nfcache); + } + } + READ_UNLOCK(&ip_nat_lock); + + /* Since we mangled inside ICMP packet, recalculate its + checksum from scratch. (Hence the handling of incorrect + checksums in conntrack, so we don't accidentally fix one.) */ + hdr->checksum = 0; + hdr->checksum = ip_compute_csum((unsigned char *)hdr, + sizeof(*hdr) + datalen); + + return NF_ACCEPT; +} + +int __init ip_nat_init(void) +{ + size_t i; + + /* Leave them the same for the moment. */ + ip_nat_htable_size = ip_conntrack_htable_size; + + /* One vmalloc for both hash tables */ + bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2); + if (!bysource) { + return -ENOMEM; + } + byipsproto = bysource + ip_nat_htable_size; + + /* Sew in builtin protocols. */ + WRITE_LOCK(&ip_nat_lock); + list_append(&protos, &ip_nat_protocol_tcp); + list_append(&protos, &ip_nat_protocol_udp); + list_append(&protos, &ip_nat_protocol_icmp); + WRITE_UNLOCK(&ip_nat_lock); + + for (i = 0; i < ip_nat_htable_size; i++) { + INIT_LIST_HEAD(&bysource[i]); + INIT_LIST_HEAD(&byipsproto[i]); + } + + /* FIXME: Man, this is a hack. <SIGH> */ + IP_NF_ASSERT(ip_conntrack_destroyed == NULL); + ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; + + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(const struct ip_conntrack *i, void *data) +{ + memset((void *)&i->nat, 0, sizeof(i->nat)); + return 0; +} + +/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ +void ip_nat_cleanup(void) +{ + ip_ct_selective_cleanup(&clean_nat, NULL); + ip_conntrack_destroyed = NULL; + vfree(bysource); +} diff -uNr linux_org/net/ipv4/netfilter/ip_nat_pptp.c linux/net/ipv4/netfilter/ip_nat_pptp.c --- linux_org/net/ipv4/netfilter/ip_nat_pptp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_nat_pptp.c 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,475 @@ +/* + * ip_nat_pptp.c - Version 1.5 + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * TODO: - Support for multiple calls within one session + * (needs netfilter newnat code) + * - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + * + * Changes: + * 2002-02-10 - Version 1.3 + * - Use ip_nat_mangle_tcp_packet() because of cloned skb's + * in local connections (Philip Craig <philipc@snapgear.com>) + * - add checks for magicCookie and pptp version + * - make argument list of pptp_{out,in}bound_packet() shorter + * - move to C99 style initializers + * - print version number at module loadtime + * 2003-09-22 - Version 1.5 + * - use SNATed tcp sourceport as callid, since we get called before + * TCP header is mangled (Philip Craig <philipc@snapgear.com>) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_helper.h> +#include <linux/netfilter_ipv4/ip_nat_pptp.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_NAT_PPTP_VERSION "1.5" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); + + +#if 0 +#include "ip_conntrack_pptp_priv.h" +#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ + ": " format, ## args) +#else +#define DEBUGP(format, args...) +#endif + +static unsigned int +pptp_nat_expected(struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info) +{ + struct ip_conntrack *master = master_ct(ct); + struct ip_nat_multi_range mr; + struct ip_ct_pptp_master *ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info; + u_int32_t newip, newcid; + int ret; + + IP_NF_ASSERT(info); + IP_NF_ASSERT(master); + IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); + + DEBUGP("we have a connection!\n"); + + LOCK_BH(&ip_pptp_lock); + ct_pptp_info = &master->help.ct_pptp_info; + nat_pptp_info = &master->nat.help.nat_pptp_info; + + /* need to alter GRE tuple because conntrack expectfn() used 'wrong' + * (unmanipulated) values */ + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) { + DEBUGP("completing tuples with NAT info \n"); + /* we can do this, since we're unconfirmed */ + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == + htonl(ct_pptp_info->pac_call_id)) { + /* assume PNS->PAC */ + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key = + htonl(nat_pptp_info->pns_call_id); + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key = + htonl(nat_pptp_info->pns_call_id); + newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + newcid = htonl(nat_pptp_info->pac_call_id); + } else { + /* assume PAC->PNS */ + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key = + htonl(nat_pptp_info->pac_call_id); + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key = + htonl(nat_pptp_info->pac_call_id); + newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + newcid = htonl(nat_pptp_info->pns_call_id); + } + } else { + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == + htonl(ct_pptp_info->pac_call_id)) { + /* assume PNS->PAC */ + newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + newcid = htonl(ct_pptp_info->pns_call_id); + } + else { + /* assume PAC->PNS */ + newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + newcid = htonl(ct_pptp_info->pac_call_id); + } + } + + mr.rangesize = 1; + mr.range[0].flags = IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED; + mr.range[0].min_ip = mr.range[0].max_ip = newip; + mr.range[0].min = mr.range[0].max = + ((union ip_conntrack_manip_proto ) { newcid }); + DEBUGP("change ip to %u.%u.%u.%u\n", + NIPQUAD(newip)); + DEBUGP("change key to 0x%x\n", ntohl(newcid)); + ret = ip_nat_setup_info(ct, &mr, hooknum); + + UNLOCK_BH(&ip_pptp_lock); + + return ret; + +} + +/* outbound packets == from PNS to PAC */ +static inline unsigned int +pptp_outbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp) + +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *) iph + iph->ihl*4; + struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *) + ((void *)tcph + tcph->doff*4); + + struct PptpControlHeader *ctlh; + union pptp_ctrl_union pptpReq; + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + + u_int16_t msg, *cid = NULL, new_callid; + + /* FIXME: size checks !!! */ + ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph)); + pptpReq.rawreq = (void *) ((void *) ctlh + sizeof(*ctlh)); + + new_callid = htons(ct_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid = &pptpReq.ocreq->callID; + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid = &pptpReq.icreq->callID; + break; + case PPTP_CALL_CLEAR_REQUEST: + cid = &pptpReq.clrreq->callID; + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + IP_NF_ASSERT(cid); + + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_callid)); + + /* mangle packet */ + ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)cid - (void *)pptph, + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid)); + + return NF_ACCEPT; +} + +/* inbound packets == from PAC to PNS */ +static inline unsigned int +pptp_inbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *oldexp) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *) iph + iph->ihl*4; + struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *) + ((void *)tcph + tcph->doff*4); + + struct PptpControlHeader *ctlh; + union pptp_ctrl_union pptpReq; + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + + u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; + u_int32_t old_dst_ip; + + struct ip_conntrack_tuple t, inv_t; + struct ip_conntrack_tuple *orig_t, *reply_t; + + /* FIXME: size checks !!! */ + ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph)); + pptpReq.rawreq = (void *) ((void *) ctlh + sizeof(*ctlh)); + + new_pcid = htons(nat_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid = &pptpReq.ocack->peersCallID; + cid = &pptpReq.ocack->callID; + if (!oldexp) { + DEBUGP("outcall but no expectation\n"); + break; + } + old_dst_ip = oldexp->tuple.dst.ip; + t = oldexp->tuple; + invert_tuplepr(&inv_t, &t); + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation */ + orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + if (t.src.ip == orig_t->src.ip && t.dst.ip == orig_t->dst.ip) { + /* expectation for PNS->PAC direction */ + t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id); + t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id); + inv_t.src.ip = reply_t->src.ip; + inv_t.dst.ip = reply_t->dst.ip; + inv_t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id); + inv_t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id); + } else { + /* expectation for PAC->PNS direction */ + t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id); + t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id); + inv_t.src.ip = orig_t->src.ip; + inv_t.dst.ip = orig_t->dst.ip; + inv_t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id); + inv_t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id); + } + + if (!ip_conntrack_change_expect(oldexp, &t)) { + DEBUGP("successfully changed expect\n"); + } else { + DEBUGP("can't change expect\n"); + } + ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_orig, &t); + ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_reply, &inv_t); + break; + case PPTP_IN_CALL_CONNECT: + pcid = &pptpReq.iccon->peersCallID; + if (!oldexp) + break; + old_dst_ip = oldexp->tuple.dst.ip; + t = oldexp->tuple; + + /* alter expectation, no need for callID */ + if (t.dst.ip == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip) { + /* expectation for PNS->PAC direction */ + t.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + } else { + /* expectation for PAC->PNS direction */ + t.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + } + + if (!ip_conntrack_change_expect(oldexp, &t)) { + DEBUGP("successfully changed expect\n"); + } else { + DEBUGP("can't change expect\n"); + } + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + break; + case PPTP_WAN_ERROR_NOTIFY: + pcid = &pptpReq.wanerr->peersCallID; + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid = &pptpReq.disc->callID; + break; + + default: + DEBUGP("unknown inbound packet %s\n", + (msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]); + /* fall through */ + + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* mangle packet */ + IP_NF_ASSERT(pcid); + DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(*pcid), ntohs(new_pcid)); + ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)pcid - (void *)pptph, + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)); + + if (new_cid) { + IP_NF_ASSERT(cid); + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_cid)); + ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - (void *)pptph, + sizeof(new_cid), (char *)&new_cid, + sizeof(new_cid)); + } + + /* great, at least we don't need to resize packets */ + return NF_ACCEPT; +} + + +static unsigned int tcp_help(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp, + struct ip_nat_info *info, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *) iph + iph->ihl*4; + unsigned int datalen = (*pskb)->len - iph->ihl*4 - tcph->doff*4; + struct pptp_pkt_hdr *pptph; + + int dir; + + DEBUGP("entering\n"); + + /* Only mangle things once: DST for original direction + and SRC for reply direction. */ + dir = CTINFO2DIR(ctinfo); + if (!((HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + && dir == IP_CT_DIR_ORIGINAL) + || (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST + && dir == IP_CT_DIR_REPLY))) { + DEBUGP("Not touching dir %s at hook %s\n", + dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", + hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" + : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" + : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" + : hooknum == NF_IP_LOCAL_IN ? "INPUT" : "???"); + return NF_ACCEPT; + } + + /* if packet is too small, just skip it */ + if (datalen < sizeof(struct pptp_pkt_hdr)+ + sizeof(struct PptpControlHeader)) { + DEBUGP("pptp packet too short\n"); + return NF_ACCEPT; + } + + pptph = (struct pptp_pkt_hdr *) ((void *)tcph + tcph->doff*4); + + /* if it's not a control message, we can't handle it */ + if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { + DEBUGP("not a pptp control packet\n"); + return NF_ACCEPT; + } + + LOCK_BH(&ip_pptp_lock); + + if (dir == IP_CT_DIR_ORIGINAL) { + /* reuqests sent by client to server (PNS->PAC) */ + pptp_outbound_pkt(pskb, ct, ctinfo, exp); + } else { + /* response from the server to the client (PAC->PNS) */ + pptp_inbound_pkt(pskb, ct, ctinfo, exp); + } + + UNLOCK_BH(&ip_pptp_lock); + + return NF_ACCEPT; +} + +/* nat helper struct for control connection */ +static struct ip_nat_helper pptp_tcp_helper = { + .list = { NULL, NULL }, + .name = "pptp", + .flags = IP_NAT_HELPER_F_ALWAYS, + .me = THIS_MODULE, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } + } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = 0xFFFF } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xFFFF + } + }, + .help = tcp_help, + .expect = pptp_nat_expected +}; + + +static int __init init(void) +{ + DEBUGP("%s: registering NAT helper\n", __FILE__); + if (ip_nat_helper_register(&pptp_tcp_helper)) { + printk(KERN_ERR "Unable to register NAT application helper " + "for pptp\n"); + return -EIO; + } + + printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + DEBUGP("cleanup_module\n" ); + ip_nat_helper_unregister(&pptp_tcp_helper); + printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); diff -uNr linux_org/net/ipv4/netfilter/ip_nat_proto_gre.c linux/net/ipv4/netfilter/ip_nat_proto_gre.c --- linux_org/net/ipv4/netfilter/ip_nat_proto_gre.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/net/ipv4/netfilter/ip_nat_proto_gre.c 2006-10-27 14:11:52.000000000 +0200 @@ -0,0 +1,225 @@ +/* + * ip_nat_proto_gre.c - Version 1.2 + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ + ": " format, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* is key in given range between min and max */ +static int +gre_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int32_t key; + + if (maniptype == IP_NAT_MANIP_SRC) + key = tuple->src.u.gre.key; + else + key = tuple->dst.u.gre.key; + + return ntohl(key) >= ntohl(min->gre.key) + && ntohl(key) <= ntohl(max->gre.key); +} + +/* generate unique tuple ... */ +static int +gre_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + u_int32_t min, i, range_size; + u_int32_t key = 0, *keyptr; + + if (maniptype == IP_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + + switch (tuple->dst.u.gre.version) { + case 0: + DEBUGP("NATing GRE version 0 (ct=%p)\n", + conntrack); + min = 1; + range_size = 0xffffffff; + break; + case GRE_VERSION_PPTP: + DEBUGP("%p: NATing GRE PPTP\n", + conntrack); + min = 1; + range_size = 0xffff; + break; + default: + printk(KERN_WARNING "nat_gre: unknown GRE version\n"); + return 0; + break; + } + + } else { + min = ntohl(range->min.gre.key); + range_size = ntohl(range->max.gre.key) - min + 1; + } + + DEBUGP("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; i < range_size; i++, key++) { + *keyptr = htonl(min + key % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + + DEBUGP("%p: no NAT mapping\n", conntrack); + + return 0; +} + +/* manipulate a GRE packet according to maniptype */ +static void +gre_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct gre_hdr *greh = (struct gre_hdr *)((u_int32_t *)iph+iph->ihl); + struct gre_hdr_pptp *pgreh = (struct gre_hdr_pptp *) greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype == IP_NAT_MANIP_DST) { + /* key manipulation is always dest */ + switch (greh->version) { + case 0: + if (!greh->key) { + DEBUGP("can't nat GRE w/o key\n"); + break; + } + if (greh->csum) { + /* FIXME: Never tested this code... */ + *(gre_csum(greh)) = + ip_nat_cheat_check(~*(gre_key(greh)), + manip->u.gre.key, + *(gre_csum(greh))); + } + *(gre_key(greh)) = manip->u.gre.key; + break; + case GRE_VERSION_PPTP: + DEBUGP("call_id -> 0x%04x\n", + ntohl(manip->u.gre.key)); + pgreh->call_id = htons(ntohl(manip->u.gre.key)); + break; + default: + DEBUGP("can't nat unknown GRE version\n"); + break; + } + } +} + +/* print out a nat tuple */ +static unsigned int +gre_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->dst.u.gre.version) + len += sprintf(buffer + len, "version=%d ", + ntohs(match->dst.u.gre.version)); + + if (mask->dst.u.gre.protocol) + len += sprintf(buffer + len, "protocol=0x%x ", + ntohs(match->dst.u.gre.protocol)); + + if (mask->src.u.gre.key) + len += sprintf(buffer + len, "srckey=0x%x ", + ntohl(match->src.u.gre.key)); + + if (mask->dst.u.gre.key) + len += sprintf(buffer + len, "dstkey=0x%x ", + ntohl(match->src.u.gre.key)); + + return len; +} + +/* print a range of keys */ +static unsigned int +gre_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.gre.key != 0 + || range->max.gre.key != 0xFFFF) { + if (range->min.gre.key == range->max.gre.key) + return sprintf(buffer, "key 0x%x ", + ntohl(range->min.gre.key)); + else + return sprintf(buffer, "keys 0x%u-0x%u ", + ntohl(range->min.gre.key), + ntohl(range->max.gre.key)); + } else + return 0; +} + +/* nat helper struct */ +static struct ip_nat_protocol gre = + { { NULL, NULL }, "GRE", IPPROTO_GRE, + gre_manip_pkt, + gre_in_range, + gre_unique_tuple, + gre_print, + gre_print_range + }; + +static int __init init(void) +{ + if (ip_nat_protocol_register(&gre)) + return -EIO; + + return 0; +} + +static void __exit fini(void) +{ + ip_nat_protocol_unregister(&gre); +} + +module_init(init); +module_exit(fini);