%%% ToC #278, (v003/a006)

\documentclass{toc}

\tocdetails{%
   volume=3, number=6, year=2007, firstpage=103,
   received={November 7, 2006}, 
   revised={July 19, 2007},
   published={August 6, 2007}
}

\newcommand{\clique}{{\sc Max Clique}}
\newcommand{\cnum}{{\sc Chromatic Number}}
\newcommand{\iset}{{\sc Max Independent Set }}
\newcommand{\eps}{\epsilon}
\newcommand{\set}[1]{\{ #1 \}}
\newcommand{\angles}[1]{\langle #1 \rangle}
\newcommand{\zo}{\{0,1\}}
\newcommand{\zod}{\{0,1\}^d}
\newcommand{\zom}{\{0,1\}^m}
\newcommand{\zon}{\{0,1\}^n}
\newcommand{\F}{\mathbb{F}}

\DeclareMathOperator{\plog}{polylog}
\DeclareMathOperator{\p}{P}
\DeclareMathOperator{\quasip}{\rm \tilde{P}}
\DeclareMathOperator{\np}{NP}
\DeclareMathOperator{\quasinp}{N\tilde{P}}
\DeclareMathOperator{\zpp}{ZPP}

\DeclareMathOperator{\ext}{Ext}
\DeclareMathOperator{\dtime}{TIME}
\DeclareMathOperator{\zptime}{ZPTIME}
\DeclareMathOperator{\fpcp}{FPCP}
\DeclareMathOperator{\bin}{bin}
\DeclareMathOperator{\dis}{DIS}
\DeclareMathOperator{\supp}{supp}

\newcommand{\zpq}{\zptime(2^{\plog(n)})}

\runningauthor{D. Zuckerman}
\runningtitle{Linear Degree Extractors and Inapproximability}
\copyrightauthor{David Zuckerman}

\begin{document}

\begin{frontmatter}[classification=float]
\title{Linear Degree Extractors and the Inapproximability of Max Clique and Chromatic Number}
\tocpdftitle{Linear Degree Extractors and Inapproximability}
\tocpdfauthor{David Zuckerman}

\author[david]{David Zuckerman\thanks{
 Most of this work was done while visiting Harvard University, and was
supported in part by 
a Radcliffe Institute for Advanced Study Fellowship, 
a John Simon Guggenheim Memorial Foundation Fellowship,
a David and Lucile Packard Fellowship for Science and Engineering,
and NSF Grants CCR-0310960 and CCF-0634811.
A preliminary version of this paper appeared in STOC'06.}}

\tockeywords{extractor, disperser, clique, chromatic number, approximation, 
pseudorandom, explicit construction, NP-hard}

\begin{abstract}
We derandomize
results of H{\aa}stad (1999) and Feige and Kilian (1998)
and show that for all~\mbox{$\eps>0$}, approximating \clique\ and
\cnum\ to within $n^{1-\epsilon}$ are NP-hard.
We further derandomize results of Khot (FOCS '01)
and show that for some $\gamma>0$,
no quasi-polynomial time algorithm
approximates \clique\ or \cnum\ to within $n/2^{(\log n)^{1-\gamma}}$,
unless $\quasinp = \quasip$.

The key to these results is a new construction of dispersers, which are
related to randomness extractors.
A randomness extractor is an algorithm which extracts randomness from a
low-quality random source, using some additional truly random bits.
% Extractors have proved useful in a variety of seemingly unrelated areas.
We construct new extractors which require only $\log_2 n + O(1)$
additional random bits for sources with constant entropy rate, and have constant error.
Our dispersers use an arbitrarily small constant times $\log n$ additional random bits
for sources with constant entropy rate.
Our extractors and dispersers output $1-\alpha$ fraction of the randomness,
for any $\alpha>0$.

Our constructions rely on recent results in additive number theory and
extractors by Bourgain, Katz, and Tao (2004), 
Barak, Impagliazzo, and Wigderson (FOCS '04),
Barak et al.\ (STOC '05), 
and Raz (STOC '05).  We also simplify and
slightly strengthen key theorems in the second and third of these papers,
and strengthen a related theorem by Bourgain (2005).
\end{abstract}

\tocams{68Q17, 68R10, 65C10}

\tocacm{F.2, G.3, G.2}

\end{frontmatter}

\section{Introduction}
\label{sec:intro}

This work has two sources of motivation:  inapproximability and randomness
extractors.  We begin with inapproximability.

\subsection{Inapproximability}

\clique\ and \cnum\ are central optimization problems.  Their decision versions 
were in Karp's original list of NP-complete problems~\cite{Karp:reduce}.
The best approximation algorithms for these problems give approximation ratios
of the form $n/\plog (n)$~\cite{BopH,Hal}, 
which is not much better than the trivial approximation of~$n$.
Yet no strong inapproximability results were known until 
Feige et al.~\cite{fglss} discovered a connection between 
probabilistically checkable proofs (PCPs) and \clique.
The celebrated PCP Theorem of Arora et al.~\cite{almss} then implied that it is
NP-hard to approximate \clique\ to within $n^c$ for some
constant $c>0$.
This ratio was improved in~\cite{BelS,bgs} until H{\aa}stad, in a breakthrough,
showed a hardness ratio of $n^{1-\eps}$, for any $\eps>0$~\cite{Has:clique}.
The catch is that H{\aa}stad's reduction is randomized, 
so his theorem assumes that $\np \neq \zpp$.
Assuming only $\np \neq \p$, H{\aa}stad's hardness ratio becomes $n^{1/2 - \eps}$.
In this paper we derandomize H{\aa}stad's randomized reduction:

\begin{theorem}
\label{thm:clique}
For all $\eps>0$,
it is $\np$-hard to approximate {\clique} to within $n^{1 - \eps}$.
\end{theorem}

The inapproximability of \cnum\ has historically been even
harder to prove than \clique, because advances have typically occurred through
reductions from \clique.
Lund and Yannakakis were the first to show that it is NP-hard to
approximate \cnum\ to within $n^c$ for some constant $c>0$~\cite{ly}.
Other reductions ensued, culminating in
Feige and Kilian's proof of a hardness ratio of $n^{1-\eps}$~\cite{fk}. 
This uses H{\aa}stad's result, so it assumes that
$\np \neq \zpp$.
Assuming only $\np \neq \p$, the best previous hardness ratio explicitly
stated appears to be $n^{1/7 - \eps}$~\cite{bgs}.  Previous work
likely implied something better, though certainly no better than 
$n^{1/2 - \eps}$.
In this paper we derandomize Feige and Kilian's result:

\begin{theorem}
\label{thm:cnum}
For all $\eps>0$,
it is $\np$-hard to approximate \cnum\ to within $n^{1 - \eps}$.
\end{theorem}

Engebretsen and Holmerin~\cite{EngH} improved the hardness ratios 
for both problems
to $n^{1-o(1)}$ under the stronger assumption that $\np \not\subseteq \zpq$.
Khot~\cite{Khot:clique} later improved these $n^{1-o(1)}$ factors to
$n/2^{(\log n)^{1-\gamma}}$ for some constant $\gamma>0$, under the
same assumption.
We derandomize Khot's results and show $\quasinp$-hardness
% the quasi-polynomial analogue of $\np$-hardness, 
with respect to quasi-polynomial time reductions.
Because of the quasi-polynomial time reductions, $\quasinp$-hardness is weaker than
$\np$-hardness; see \expref{Subsection}{sec:quasinp} for more details.

\begin{theorem}
\label{thm:clique-quasi}
For some $\gamma>0$, it is $\quasinp$-hard to
approximate \clique\ to within $n/2^{(\log n)^{1-\gamma}}$.
\end{theorem}

\begin{theorem}
\label{thm:cnum-quasi}
\begin{sloppypar}
For some $\gamma>0$, it is $\quasinp$-hard to
approximate \cnum\ to within $n/2^{(\log n)^{1-\gamma}}$.
\end{sloppypar}
\end{theorem}

\smallskip
The key to our inapproximability results is constructing an appropriate 
disperser, which is related to a randomness extractor.
Good dispersers were known to help derandomize inapproximability results
for \clique\ (e.g.,~\cite{Zbpp,tz:code}), but it was not known for \cnum.
Before discussing dispersers, we discuss extractors.

\subsection{Randomness extractors}

Randomness extractors are motivated by the possibility of using
defective sources of randomness.
The model for defective random source involves lower bounding the
min-entropy:

\begin{definition}
The {\em min-entropy} of a distribution $X$ is $H_{\infty}(X) =
\min_x\{-\log_2 \Pr[X=x]\}$.
A {\em $k$-source} is a distribution with min-entropy at least $k$.
The {\em entropy rate} of a $k$-source on $\zon$ is $k/n$; we sometimes call
a $k$-source a rate-$k/n$-source.
\end{definition}

A randomness extractor is a function which extracts randomness from 
a $k$-source using a few additional uniformly random bits.

\begin{definition}[\cite{nz}]
Let $U_\ell$ denote the uniform distribution on $\ell$ bits.
A function $\ext: \set{0,1}^n \times \set{0,1}^d \rightarrow
\set{0,1}^m$ is a {\em $(k,\epsilon)$-extractor} if for every
$k$-source $X$, 
the distribution $\ext(X,U_d)$ is $\eps$-close in statistical (variation)
distance to $U_m$.
We say $\ext$ is a {\em strong} $(k,\epsilon)$-extractor if
the function $\ext(x,y)\circ y$ is a $(k,\eps)$-extractor,
where $\circ$ denotes concatenation. 
\end{definition}

Besides their straightforward applications to simulating
randomized algorithms using weak sources, extractors have had
applications to many areas in derandomization that are seemingly
unrelated to weak sources, including inapproximability~\cite{Zbpp,U:unapprox,MosU}.
Nisan and Ta-Shma~\cite{nt} survey these applications.
\iffalse
as well as
pseudorandom generators for space-bounded computation~\cite{nz}, 
expanders that beat the second eigenvalue bound~\cite{wz},
random sampling using few random bits~\cite{Zsamp}, 
cryptography~\cite{lu:crypto,v:local,cdhks,DodS},
error-correcting codes with strong list decoding properties~\cite{tz:code}, 
superconcentrators and non-blocking networks~\cite{wz}, 
sorting and selecting in rounds~\cite{Pip},
time versus space complexities~\cite{Sip},
leader election~\cite{Zsamp,rz},
another proof that ${\rm BPP} \subseteq {\rm PH}$~\cite{GolZ}, 
and implicit data structures~\cite{FiaN,Zthesis}.
\fi

Like many objects in the study of pseudorandomness, the existence of excellent extractors
is relatively easy to establish via the probabilistic method.
However, the explicit construction of efficient extractors has proved to be much more difficult.

We wish to construct extractors for any min-entropy $k$ with $d$,
the number of truly random bits, as small as possible and $m$, 
the number of output bits, as large as possible.
Different parameter settings are needed for different applications.
Constructing good extractors is highly non-trivial, because such
constructions beat the ``eigenvalue bound''~\cite{wz}.
Starting with the first extractor of Nisan and Zuckerman~\cite{nz},
a lot of effort has been expended constructing good extractors.
See Shaltiel's survey~\cite{Sha:survey} for more details.

In many applications, extractors are viewed as highly
unbalanced strong expanders. In this view an extractor is a
bipartite graph $G=(V,W,E)$ with $V=\set{0,1}^n$,
$W=\set{0,1}^m$, and $(x,z)$ is an edge iff there is some $y
\in \set{0,1}^d$ such that $\ext(x,y)=z$. Thus, the degree of each
vertex of $V$ is $D=2^d$, and the extractor hashes the input $x
\in V$ to a random neighbor among its $D$ neighbors in $W$.

Often this degree $D$ is of more interest than $d=\log D$. 
For example, 
in the samplers of~\cite{Zsamp} the degree is the number of samples;
in the extractor codes of~\cite{tz:code} $D$ is the length of the code;
in the simulation of BPP using weak sources~\cite{Zbpp} 
the degree is the number of calls to the BPP algorithm.
Most relevant for us, in the inapproximability of \clique~\cite{Zbpp} the
size of the graph is closely related to~$D$.

Before the work of Ta-Shma et al.~\cite{tzs},
all explicit extractors had degree $D$ at least some unspecified
polynomial in $n = \log |V|$. 
In contrast,
a non-explicit construction achieves
$D=O(n)=O(\log |V|)$, which matches the lower bound.
Ta-Shma et al.\ were able to achieve degree $D=O(n \log^* n)$
for $k \ge \sqrt{n}\log^2 n$, but
could only output about $k/\sqrt n$ bits.  In the case where
$k = \Omega(n)$, they could output $m = \Omega(k)$ bits, but then
they achieved degree $D=n \cdot \plog (n)$.
Our new construction achieves linear degree and linear output length for constant-rate sources.

\begin{theorem}
\label{thm:extract}
For all constant $\alpha,\delta,\eps > 0$ there is an
efficient family of strong $(k = \delta n, \eps)$-extractors
$\ext: \zon \times \zod \to \zom$ with $m \ge (1-\alpha)\delta n$
and $D = 2^d = O(n)$.
\end{theorem}

We now define the related notion of a disperser.
While dispersers are usually defined with respect to an error parameter $\eps$,
here it is more convenient to use the parameter $s=1-\eps$.

\begin{definition}
\label{def-disperser}
We may view a function
$\dis:[N] \times [D] \to [M]$ 
as a bipartite graph $([N],[M],E)$ where $(x,z) \in E$ iff
$\dis(x,y) = z$ for some $y \in [D]$.
For a set $X \subseteq [N]$,
let $\Gamma(X) = \set{\dis(x,y) \mid x \in X,y \in [D]}$ be the set of neighbors of $X$.
We say $\dis$ is a \emph{$(K,s)$-disperser} if, for any $X \subseteq [N]$
with $|X| \ge K$, $|\Gamma(X)| \geq s M$.
We say $\dis$ is a \emph{strong} $(K,s)$-disperser if
the function $\dis(x,y) \circ y$ is a $(K,s)$-disperser, where $\circ$ denotes concatenation.
\end{definition}

When $s$ is very small (so the error is close to 1), the probabilistic method can be used to
show that there exists
dispersers with degree even smaller than $n$,
namely $O(n/\log s^{-1})$.
In this paper, we succeed in matching this degree explicitly for constant-rate
sources.
These dispersers are the key for our inapproximability results.

\begin{theorem}
\label{thm:disperse}
For all constant $\alpha,\delta > 0$ and $s=s(n)>0$,
there is an efficient family
of strong $(K=N^\delta, s)$-dispersers $\dis:[N=2^n] \times [D] \to [M=2^m]$ 
such that $D = O(n/\log s^{-1})$ and $m  \ge (1-\alpha)\delta n$.
For subconstant $\delta=\delta(n)$,
the dependence is $D = (1/\delta)^{O(1)} n/\log s^{-1}$ and $m = \delta^{O(1)} n$.
\end{theorem}

\subsection{Techniques}

Our techniques are based on a combination of 
random walks on expanders and additive number theory.
Random walks on expanders have been used to amplify the success probability
of RP and BPP algorithms without using many additional random bits
\cite{aks:space,iz,CohW}.  This yields a disperser for sources with
entropy rate greater than 1/2~\cite{CohW}.  
By using Chernoff bounds for random walks on expanders~\cite{Gil,Kah:deviation,wx},
we can construct extractors in a similar way.
However, random walks provably fail
when the entropy rate drops below 1/2, so they were not considered
relevant for this case.

We handle entropy rates below 1/2
by first condensing the input until its entropy rate exceeds 1/2,
and then applying a random walk on an expander.
Condensers have been used before to build extractors~\cite{rsw,tuz}.
We condense using techniques developed from additive number theory.

Our basic condenser requires only one additional random bit, and is very simple.
Choose a prime~$p$ and form the line-point incidence graph over $\F_q$,
where~$q=2^p$.
This bipartite graph has as independent sets the lines and points in the plane $\F_q^2$,
with an edge between a point and a line if the point lies on the line.
View the input distribution as a distribution over the $q^3$~edges.
On input an edge, use the one random bit to output a random choice of its two endpoints.
If the input distribution has min-entropy rate~$\delta$, then roughly speaking one of the
two outputs will have min-entropy rate~$\delta' > \delta$.
The proof of correctness is simple, given the line-point incidence
theorem from Bourgain-Katz-Tao~\cite{bkt}.

This basic condenser improves that of Barak et al.~\cite{bkssw}, which requires two random bits.
The improvement is not necessary for our results, as we apply the basic condenser iteratively
to achieve entropy rate $.9$.
In fact, we use Raz's condenser~\cite{Raz:extract}, which is strong in the sense that with
high probability over the constant-bit seed, the min-entropy rate will increase.
We iteratively apply the Raz condenser
in a manner similar to~\cite{wz}, to improve the output length to $1-\alpha$
fraction of the input min-entropy, for any $\alpha>0$.

Although not needed for our other results,
we further simplify and slightly strengthen other applications of additive number theory.
These applications are based on the important theorem of
Bourgain-Katz-Tao~\cite{bkt} and extended by Bourgain-Glibichuk-Konyagin~\cite{bgk}:
in a field $\F_q$ where $q$ is either prime or $2^p$ for $p$~prime,
if $|A| \leq p^{.9}$, then $\max(|A+A|,|A \cdot A|) \geq |A|^{1+\alpha}$
for a global constant $\alpha>0$.
(See \expref{Section}{sec:sumprod} for more details, including the recent extension to non-prime fields.)
Barak-Impagliazzo-Wigderson~\cite{biw} used these ideas to show that for $\delta \leq .9$,
if $A$, $B$, and $C$ are independent rate-$\delta$-sources taking values in $\F_q$,
then $AB+C$ is close to a rate-$(1+\alpha')\delta$-source.
We show that $A$ and $C$ do not have to be
independent.  Instead, the lemma follows if $(A,C)$ is independent
from $B$.  Our overall proof is simpler than that in~\cite{biw}.
We further strengthen a theorem of Bourgain~\cite{Bou:more} and show
that the function $A(A+B)$ also gives a rate improvement.
% This strengthens the first theorem because the two sources are of equal length.
%Moreover, it says the first theorem holds when $C=A^2$ and the entropy rate is measured
%with respect to the length of $A$, rather than $(A,C)$, so only half the randomness is required.

This paper is organized as follows.  After some preliminaries in \expref{Section}{sec:prelim},
we give our basic disperser and extractor constructions in Sections~\ref{sec:disperser}
and~\ref{sec:extractor}, respectively.
We next show how to improve the output length of both constructions in \expref{Section}{avi-section}.
We then give the inapproximability of Max Clique and Chromatic Number in Sections~\ref{sec:clique}
and~\ref{sec:cnum}, respectively.
Finally, we improve the additive number theory applications in \expref{Section}{sec:numtheory}.


\section{Preliminaries}
\label{sec:prelim}

Some common notation we use is $\circ$ for concatenation and
$[n]$ for the set $\set{1,2,\ldots,n}$.
All logarithms are to the base 2.

When letters denote integers we often use a capital letter to denote 2 to the corresponding small letter, e.g., $K=2^k$.
When letters denote random variables we often use capital letters for random variables and corresponding
small letters for their instantiations.

For readability, we often assume various quantities are integers when
they are not necessarily.  It is not hard to see that this does not
affect our analysis.

We often use the term efficient to denote polynomial-time computable.

\subsection{Reductions and quasi-NP-hardness}
\label{sec:quasinp}

Our NP-hardness results are with respect to polynomial-time, many-one reductions.

Quasi-polynomial in $n$ means $2^{\plog(n)}$.  $\quasinp$ and $\quasip$
are the quasi-polynomial analogues of $\np$ and $\p$, respectively.
As usual with inapproximability results, we analyze the appropriate gap problem.

Note that no language is $\quasinp$-complete with respect to polynomial-time reductions.
For if there were such a language, it would be in $\dtime(2^{(\log n)^c})$ for some $c$;
but then $\quasinp \subseteq \dtime(2^{(\log n)^{c+1}})$, contradicting the time hierarchy theorem.

Therefore, we consider $\quasinp$-hardness with respect to
quasi-polynomial-time, many-one reductions.
Then any $\np$-hard language is also $\quasinp$-hard.
Moreover, if an $\quasinp$-hard language is in $\quasip$,
then $\quasinp = \quasip$.
Of course, $\quasinp = \quasip \iff \np \subseteq \quasip$.

\subsection{Distance between distributions}

For a probability distribution $X$, $X(s)$ denotes $\Pr[X=s]$.
For a set~$S$, $X(S)$ denotes $\Pr[X \in S]$.

\begin{definition}
Let $X_1$ and $X_2$ be two distributions on the same space $\Omega$.  The
statistical, or variation, distance between them is
\[
\|X_1-X_2\| = \max_{S \subseteq \Omega}|X_1(S) - X_2(S)|
= \frac{1}{2} \sum_{s \in \Omega}|X_1(s)-X_2(s)|\,.
\]
\end{definition}

We say $X_1$ and $X_2$ are $\eps$-close if $\|X_1-X_2\| \leq \eps$,
and are $\eps$-far otherwise.
We say a distribution on $\zon$ is $\eps$-uniform if it is $\eps$-close to $U_n$,
the uniform distribution on $n$~bits.

A useful method of computing the distance to the closest $k$-source is the following.

\begin{lemma}
\label{lem:close}
The distance of $X$ to the closest $\ell$-source is
$\sum_s \max(X(s) - 2^{-\ell},0)$.
\end{lemma}

Of course, only those $s$ with $X(s) > 2^{-\ell}$ contribute to the above sum.

\subsection{Flat sources}

\begin{definition}
A source is a probability distribution.
A flat source is a source which is uniform on its support.
The support of a distribution~$X$ is denoted~$\supp(X)$.
\end{definition}

The following lemma shows that it suffices to consider flat $k$-sources.

\begin{lemma}[\cite{cg:weak}]
Any $k$-source is a convex combination of flat $k$-sources.
\end{lemma}

\subsection{Dispersers}

Dispersers were defined in \expref{Definition}{def-disperser}.
There are two possible notions of efficiency:  one relative to the input size $\log N + \log D$
and the other relative to the graph size $N+M$.
For the inapproximability results, we only need the second, weaker, notion.

\begin{definition}
We say $\dis:[N] \times [D] \to [M]$ is {\em efficient} if it runs in polynomial time
in its input size $\log N + \log D$.
We say $\dis$ is {\em polynomial-time constructible} if the disperser graph is constructible
in polynomial time in the number of vertices $N+M$.
\end{definition}

Of course, efficient implies polynomial-time constructible.

The following simple lemma is useful when $D=O(1)$.

\begin{lemma}
\label{simple-strong}
A $(K,s)$-disperser is also a strong $(K,s/D)$-disperser.
\end{lemma}

We also use the following simple lemma.

\begin{lemma}
\label{lem:combine}
Given an efficient $(K, s')$-disperser 
$\dis_1:[N] \times [D_1] \to [N']$ 
and an efficient $(K'=s' N',s)$-disperser
$\dis_2:[N'] \times [D_2] \to [M]$,
we can build an efficient $(K,s)$-disperser
$\dis:[N] \times [D_1 D_2] \to [M]$.
If moreover $\dis_2$ is a strong $(K',s)$-disperser,
then $\dis$ is a strong $(K,s/D_1)$-disperser.
\end{lemma}

\begin{proof}
Take $\dis(x,y_1 \circ y_2) = \dis_2(\dis_1(x,y_1),y_2)$.
It is straightforward to verify that $\dis$ is a $(K,s)$-disperser.
To see the final statement of the lemma, suppose $\dis_2$ is strong.
Then $\dis(x,y_1 \circ y_2)\circ y_2$ is a $(K,s)$-disperser,
so $\dis(x,y_1 \circ y_2)\circ y_1 \circ y_2$ is a $(K,s/D_1)$-disperser.
\end{proof}

While we need the notion of strong disperser for the inapproximability of Chromatic Number,
the notion that suffices for this is captured in the following simple lemma.

\begin{lemma}
\label{strong-disperser}
Let
$\dis:[N] \times [D] \to [M]$ 
be a strong $(K,s)$-disperser.
For a set $X \subseteq [N]$,
let $\Gamma_y(X) = \set{\dis(x,y) \mid x \in X}$.
Then $\dis$ has the property that for any $X \subseteq [N]$
with $|X| \ge K$,
there is a $y$ such that $|\Gamma_y(X)| \geq s M$.
\end{lemma}

\subsection{Expander graphs}
\label{sec:expander}

Expander graphs are related to dispersers, and we use random walks on expanders to build
our dispersers.  We define expansion via eigenvalues.
Let $G$ be a connected regular undirected graph, and
let $A$ be the transition matrix of a random walk on $G$.
(If $M$ is the adjacency matrix and $d$ the degree, then $A=M/d$.)
We call $G$ a $\lambda$-expander if all eigenvalues of~$A$ other than 1 are
at most~$\lambda$ in absolute value.
Smaller $\lambda$ mean better expansion.
We will need $2^c$-regular $2^{-\gamma c}$-expanders on $2^m$ nodes,
for a constant~$\gamma>0$.

Extending earlier constructions which required large primes~\cite{lps, Mar:Ramanujan},
Morgenstern~\cite{Mor:Ramanujan} gave explicit constructions which achieve this
with $\gamma$ approaching $1/2$.
However, because the number of vertices is not~$2^m$ and there are restrictions on the degree,
it is easier to use expanders by
Gabber and Galil~\cite{GabG}.  They gave an explicit construction of 8-regular
$\lambda$-expanders on $2^m$ nodes, for even $m$,
where $\lambda=5\sqrt{2}/8<1$.
(See the survey~\cite{hlw} for the statement in this form, and for many other aspects about expanders.)
The neighbors of a vertex may be computed with a constant
number of arithmetic operations.
By taking the $(c/3)$th power of the graph, we get a $2^c$-regular $\lambda^{c/3}$-expander,
as we need (though this requires that~$3|c$).

\subsection{Somewhere-random sources}

The concept of somewhere-random sources will be useful in constructing 
dispersers.

\begin{definition}
An \emph{elementary somewhere-$k$-source} is a vector of sources 
$(X_1,\ldots,X_\ell)$, such that some $X_i$ is a $k$-source.
A {\em somewhere-$k$-source} is a convex combination of 
elementary somewhere-$k$-sources.
\end{definition}

Note that there may be arbitrary dependencies among the $X_i$.
Further note that in a somewhere-$k$-source which is not elementary,
all $X_i$ may have low min-entropy.

\subsection{Condensers}

Condensers and somewhere condensers will be essential in our extractor 
and disperser constructions, respectively.

\begin{definition}
A function $C:\zon \times \zod \to \zom$ is a 
\emph{$(k \to \ell,\eps)$-condenser}
if for every $k$-source $X$, $C(X,U_d)$ is $\eps$-close to 
some $\ell$-source.
When convenient, we call $C$ a rate-$(k/n \to \ell/m,\eps)$-condenser.
The condenser is strong if the average over $y \in \zod$ of the 
minimum distance of $C(X,y)$
to some $\ell$-source is at most $\eps$.
\end{definition}

\begin{definition}
A function $C:\zon \times \zod \to \zom$ is a 
\emph{$(k \to \ell,\eps)$-somewhere-condenser}
if for every $k$-source $X$, the vector $\angles{C(X,y)}_{y \in \zod}$ 
is $\eps$-close to a somewhere $\ell$-source.
When convenient, we call $C$ a rate-$(k/n \to \ell/m,\eps)$-somewhere-condenser.
\end{definition}

Note that a $(k \to \ell,\eps)$-strong-condenser is
a $(k \to \ell,\eps)$-somewhere-condenser.
We will also need the following.

\begin{lemma}
\label{condense-to-disperse}
If $C:\zon \times \zod \to \zom$ is a 
$(k \to \ell,\eps)$-somewhere-condenser, then it is a 
$(2^k,(1-\eps)2^{\ell-m})$-disperser.
\end{lemma}

\begin{proof}
This follows because a distribution which is $\eps$-close to an $\ell$-source
must have a support of size at least $(1-\eps)2^\ell$.
\end{proof}

When composing condensers, we will need the following type of lemma.

\begin{lemma}
\label{lem:compose}
Suppose $Z_1$ is $\eps_1$-close to an $\ell_1$-source, and for all $z_1 \in \supp(Z_1)$,
the distribution $(Z_2 \mid Z_1=z_1)$ is $\eps_2$-close to an $\ell_2$-source.
Then $Z_1 \circ Z_2$ is $\eps_1 + \eps_2$-close to an $\ell_1 + \ell_2$-source.
\end{lemma}

\begin{proof}
Let $W_1$ be an arbitrary $\ell_1$-source which is $\eps_1$-close to~$Z_1$.
For~$w_1 \in \supp(Z_1) \cap \supp(W_1)$, define
the distribution of $(W_2 \mid W_1=w_1)$ to be an arbitrary $\ell_2$-source which is $\eps_2$-close
to~$(Z_2 \mid Z_1=w_1)$.
For~$w_1 \in \supp(W_1) \setminus \supp(Z_1)$, define
the distribution of $(W_2\mid W_1=w_1)$ to be the uniform distribution.
Then $W_1 \circ W_2$ is an $\ell_1+\ell_2$-source, which is
$\eps_1 + \eps_2$-close to $Z_1 \circ Z_2$,
\end{proof}

We build extractors by first condensing and then applying a weaker extractor.
The idea of condensing before extracting was used in~\cite{rsw,tuz}, and a simple lemma from~\cite{tuz} shows that this works.

\begin{lemma}[\cite{tuz}]
\label{lem:tuz}
Suppose that $C:\zon \times \zo^{d_1} \to \zo^{n'}$ is an efficient (strong) $(k \to \ell,\eps_1)$-condenser,
and $\ext:\zo^{n'} \times \zo^{d_2} \to \zom$ is an efficient (strong) $(\ell,\eps_2)$-extractor.
Then
\[
\ext'(x,y_1 \circ y_2) = \ext( C(x,y_1),y_2)
\]
is an efficient (strong) $(k,\eps_1 + \eps_2)$-extractor.
\end{lemma}
%% ToC edit: I added the word "that" in the first sentence of the lemma above to make the spacing work out.

\subsection{Sum-product theorem}
\label{sec:sumprod}

The following sum-product theorem underlies our condensers.

\begin{theorem}[\cite{bkt,bgk}]
There is a constant $\alpha>0$ such that for any field $F=\F_q$ where $q$ is either
prime or $2^p$ for $p$ prime, the following holds.
For any non-empty $A \subset F$, $|A|<q^{.9}$,
\[ \max(|A+A|,|A \cdot A|) = \Omega(|A|^{1+\alpha})\,. \]
\end{theorem}

Here the $.9$ can be increased to any constant less than 1, but the constant $\alpha$ will
likely decrease.
Note that for $q=2^p$, when $A=\zo$ then $A+A=A\cdot A = A$;
however, the $\Omega$ handles this problematic case.
We use results based on earlier versions of this theorem, when the full bounds were not known
to hold for fields of size $2^p$.
Although the result quoted above isn't apparent for such fields in the credited papers,
it follows from Corollary 2.56 of~\cite{tv-book}, which credits those papers.
For the best constant $\alpha$ as of this writing see~\cite{KatS}.
For a self-contained exposition of the prime case, see~\cite{Gre:sumprod}.

\section{Disperser construction}
\label{sec:disperser}

We first use random walks on expanders to construct low-degree dispersers for
high min-entropy.  This construction could work for any min-entropy rate bigger than
$1/2$, but to output almost all the randomness we need rate close to 1.

\begin{proposition}
\label{prop:rw}
For any $\alpha > 0$, there is a $\beta, c_0 > 0$ such that for any
$c=c(n) \ge c_0$, there is an efficient family
of strong $(K=N^{1-\beta}, 2^{-c})$-dispersers $\dis:[N=2^n] \times [D] \to [M=2^m]$ 
such that $D \le \alpha n/c$ and $m \ge (1-\alpha)n$.
(Let $\gamma>0$ be the constant from \expref{Section}{sec:expander}.
We can take $c_0 = 2/\gamma$ and
$\beta = \alpha \gamma/5$.)
\end{proposition}

\begin{proof}
We use the disperser of~\cite{aks:space}.
Set $s=2^{-c}$, and $m=(1-\alpha)n$.
Let $G$ be a $2^c$-regular $2^{-\gamma c}$-expander on $[2^m]$
(see \expref{Section}{sec:expander}).
To find the neighbors of a vertex $u \in [2^n]$, use the $n$ bits defining $u$
to choose a random vertex $v_0 \in [2^m]$ and then take a random walk 
$v_1,\ldots,v_D$ on $G$.  Connect $u$ to $v_1,\ldots,v_D$.
We ignore $v_0$ so that we cleanly get
$n = m + Dc$, and $D=(n-m)/c = \alpha n/c$.

First consider when the bits describing the random walk are uniformly random.
In this case we can use the tight analysis given by Kahale~\cite{Kah}.
For $S \subseteq [2^m]$ and $s = |S|/2^m$,
Kahale showed that
\[ \Pr[(\forall i) v_i \in S] \le s(s + (1-s) \lambda)^{D-1} < 
(s + \lambda)^D\,. \]
Since $s = 2^{-c}<2^{-\gamma c}$, this probability is less than
$2^{(1-\gamma c)D} \le 2^{-(\gamma/2)cD} \le 2^{-(\gamma/2)\alpha n}$.

When the bits describing the random walk are chosen from a source with min-entropy $(1-\beta)n$,
each string which before had probability $2^{-n}$ now has probability at most $2^{\beta n} \cdot 2^{-n}$.
Therefore, the error probability grows by at most $2^{\beta n}$, and hence is at most
$2^{(\beta-\gamma \alpha/2)n}$.
Therefore, this is a $(K=N^{1-\beta}, s)$-disperser for any $\beta < \gamma \alpha /2$.

We still need to show that this disperser is strong.
To do this, we must consider the situation where
instead of one $S$ we now have $D$ such~$S_i$, $|S_i| = s_i 2^m$,
where the average of the~$s_i$ is at most~$s$.
By the result of Kahale given as Theorem~A.5 in~\cite{Gol:sampler-survey},
for a uniformly random walk
\begin{align*}
\Pr[(\forall i) v_i \in S_i] &\leq \sqrt{s_1 s_D} \prod_{i=2}^D \sqrt{s_i + (1-s_i)\lambda^2} \leq \sqrt{ \prod_{i=1}^D (\lambda^2 + (1-\lambda^2)s_i)}\\
&\leq \left( \frac{1}{D}\sum_{i=1}^D (\lambda^2 + (1-\lambda^2)s_i)\right)^{D/2}
\leq (\lambda^2 + s)^{D/2}\,.
\end{align*}
The third inequality follows from the arithmetic-geometric mean.
This bound $(s + \lambda^2)^{D/2}$ is at most the square root of Kahale's earlier bound, so it's
at most $2^{-(\gamma \alpha/4)n}$.  By choosing $\beta < \gamma \alpha/4$ the proposition
follows.
\end{proof}

To give a construction for all positive entropy rates, we use the
following theorem, which follows from the condenser in~\cite{bkssw}
or~\cite{Raz:extract}.  While~\cite{bkssw} only gives an ordinary disperser,
by \expref{Lemma}{simple-strong} it is also a strong disperser for essentially
the same parameters, since D is constant.

\begin{theorem}[\cite{bkssw,Raz:extract}]
\label{thm:bkssw}
For any $\beta,\delta>0$, there is an efficient family of
rate-$(\delta \to 1-\beta,\eps=2^{-\Omega(n)})$-somewhere-condensers 
$C:[N=2^n] \times [D] \to [M=2^m]$
where $D=O(1)$ and $m=\Omega(n)$.
For subconstant $\delta=\delta(n)$
the dependence is $D=(1/\delta)^{O(1)}$ and $m=\delta^{O(1)} n$.
\end{theorem}

\begin{remark}
In the original paper, the construction for subconstant $\delta$ required a large prime.
However, there is no longer a need for this, given the new sum-product theorem for
fields of size $2^p$ (see \expref{Subsection}{sec:sumprod}).
\end{remark}

Applying Lemmas~\ref{condense-to-disperse} and~\ref{simple-strong}, 
we deduce

\begin{corollary}
\label{cor:bkssw}
For any $\beta,\delta>0$, there is an efficient family of
strong $(K = N^\delta, M^{-\beta})$-dispersers 
$\dis:[N=2^n] \times [D] \to [M=2^m]$
where $D=O(1)$ and $m=\Omega(n)$.
\end{corollary}

We can now give our disperser construction, although for now we obtain
output length a small constant fraction of $\delta n$, rather than almost all
of it.

\begin{theorem}
\label{thm:disperse-weaker}
For any $\delta > 0$ and $s=s(n)>0$,
there is an efficient family
of strong $(K=N^\delta, s)$-dispersers $\dis:[N=2^n] \times [D] \to [M=2^m]$ 
such that $D = O(n/\log s^{-1})$ and $m  = \Omega(n)$.
For subconstant $\delta=\delta(n)$ the dependence is
$D = (1/\delta)^{O(1)} n/\log s^{-1}$ and $m = \delta^{O(1)} n$.
\end{theorem}

\begin{proof}
% Assume without loss of generality that $\delta \le .9$ and $s \le .03$,
% so $.8 + 1/(2\lg s^{-1}) < .9$.
Let
$\dis_1:[N=2^n] \times [D_1=O(1)] \to [N'=2^{n'}]$ be
an efficient strong $(K=N^\delta, (N')^{-.1})$-disperser from
\expref{Corollary}{cor:bkssw}, with $n' = \Omega(n)$.
Let
$\dis_2:[N'] \times [D_2 \le  n'/\lg s^{-1}] \to [M=2^m]$,
be an efficient strong $(K'= (N')^{.9},s)$-disperser
given by \expref{Proposition}{prop:rw}, with $m = n'/2$.
Applying \expref{Lemma}{lem:combine} yields the desired disperser.
\end{proof}

To improve the output length to $(1-\alpha)\delta n$, we need to use better condensers,
and we defer the proof to the next section.

\section{Extractor construction}
\label{sec:extractor}

Readers interested solely in the inapproximability
results can skip directly to \expref{Section}{sec:clique}, as the current dispersers suffice to prove those
results.

Our extractor construction is essentially the same as our disperser construction.
We first show how to extract when the entropy rate is close to 1, by
using random walks on expanders.
Then we use Raz's recent condenser~\cite{Raz:extract} to reduce to 
the high-entropy case.

\begin{proposition}
\label{rw-extractor}
For all $\alpha, \eps > 0$, there exists $\beta > 0$ such that there is an
efficient family of $(k = (1-\beta) n, \eps)$-extractors
$\ext: \zon \times \zod \to \zom$ with $m \ge (1-\alpha)n$
and $D = 2^d \le \alpha n$.
\end{proposition}

\begin{proof}
Set $m=(1-\alpha)n$ and $c=3$ (say).
Let $G$ be a $2^c$-regular $\lambda$-expander on $[2^m]$
with $\lambda$ bounded away from~1
(see \expref{Section}{sec:expander}).
To find the neighbors of a vertex $u \in [2^n]$, use the $n$ bits defining $u$
to choose a random vertex $v_0 \in [2^m]$ and then take a random walk 
$v_1,\ldots,v_D$ on $G$.  Connect $u$ to $v_1,\ldots,v_D$.
As before,
$n = m + Dc$, and $D=(n-m)/c = \alpha n/c$.

Let $S \subseteq [2^m]$ have density $\mu = |S|/2^m$.
First consider when the bits describing the random walk are chosen uniformly, and
let the random variable $\hat{\mu}$ denote the fraction
of $v_i$ which are in $S$.
Gillman~\cite{Gil}
(see also Kahale~\cite{Kah:deviation}) proved a Chernoff bound for random walks on expanders.
We use the improved constants obtained by Healy~\cite{Hea}:
\[ \Pr[ | \hat{\mu} - \mu | \ge \eps ] \le 2 \exp( -(1-\lambda) \eps^2 D/4)\,. \]
(Dinwoodie~\cite{Din} essentially improved the constant 4 above to 2, but only states it from
a worst-case vertex so there is another term.)

For large enough $n$ (to get rid of the multiplicative 2),
this error is at most $2^{-an}$ for $a = (1-\lambda)\eps^2 \alpha/(4c)$.
When the bits describing the random walk are chosen from a source with min-entropy $(1-\beta)n$,
the error probability grows by at most $2^{\beta n}$.
Thus this is a $(k= (1-\beta) n, \eps)$-extractor for $\beta < a$.
\end{proof}

We can make these extractors strong by using a better Chernoff bound.

\begin{proposition}
\label{rw-strong-extractor}
For all $\alpha, \eps > 0$, there exists $\beta > 0$ such that there is an
efficient family of strong-$(k = (1-\beta) n, \eps)$-extractors
$\ext: \zon \times \zod \to \zom$ with $m \ge (1-\alpha)n$
and $D = 2^d \le \alpha n$.
\end{proposition}

\begin{proof}
We use the same construction.  For the proof, we must now show near uniformity
over $[D] \times [2^m]$.  We therefore consider
$S \subseteq [D] \times [2^m]$, so $S = \cup_i \set{i} \times S_i$.
Again consider when the bits describing the random walk are chosen uniformly, and
now let the random variable $\hat{\mu}$ denote the fraction
of $v_i$ which are in $S_i$.
Wigderson and Xiao~\cite{wx}
improved Gillman's theorem above for this case.
We again use Healy's improved constants~\cite{Hea}):
\[ \Pr[ | \hat{\mu} - \mu | \ge \eps ] \le 2\exp( -(1-\lambda) \eps^2 D/4)\,. \]
We can then conclude with the same argument as above.
\end{proof}

For dispersers, we combined the high-entropy construction with somewhere-condensers
from \expref{Theorem}{thm:bkssw}.  For extractors, we need to use the improved strong
condenser due to Raz~\cite{Raz:extract}.

\begin{theorem}[\cite{Raz:extract}]
\label{thm:Raz-condense}
For any constants $\beta,\delta, \eps > 0$, there is a constant $d$ such that 
there is an efficient
% family of 
rate-$(\delta \to (1-\beta),\eps)$-strong
condenser $C:\zon \times \zod \to \zom$
such that $m=\Omega(n)$.
\end{theorem}

Applying \expref{Lemma}{lem:tuz} to Raz's condenser and the extractor above, we obtain the desired
theorem,
except that the output length is linear instead of the $(1-\alpha)$-fraction we claimed.

\begin{theorem}
For all $\delta,\eps > 0$ there is an
efficient family of strong-$(k = \delta n, \eps)$-extractors
$\ext: \zon \times \zod \to \zom$ with $m = \Omega(n)$
and $D = 2^d = O(n)$.
\end{theorem}

\section{Improving the output length}
\label{avi-section}

The results in this section were obtained jointly with Avi Wigderson.

We now would like to obtain output length $(1-\alpha)k$, for an arbitrary $\alpha>0$, while maintaining
the linear degree.
The initial idea is to do a construction similar to that by Wigderson and the author~\cite{wz}:
if the output length is significantly less than $k$, use an independent seed to extract more bits from the same input.
We can't do this directly, because even two runs of the extractor gives degree $\Theta(n^2)$,
which is too expensive.  Yet we can achieve this with the condenser, which uses only a
constant number of random bits.  Thus, our intermediate goal, which is interesting in its own right, is:

\begin{theorem}
\label{all-condense}
For any constants $\alpha, \beta, \delta, \eps > 0$, there is a constant $d$ such that 
there is an efficient
% family of 
rate-$(\delta \to (1-\beta),\eps)$-strong
condenser $C:\zon \times \zod \to \zom$
such that $m \ge (1-\alpha) \delta n$.
\end{theorem}

Yet this theorem cannot be achieved by applying the above idea to \expref{Theorem}{thm:Raz-condense}.
The reason is that the error cannot be controlled.  If the output length is $\gamma n$, we would
like to iterate about $1/\gamma$ times, but we cannot do this if the initial error is bigger than $\gamma$.
In \expref{Theorem}{thm:Raz-condense}, as well as \expref{Theorem}{thm:bkssw}, the output length may depend on the error.
Hence we construct an improved condenser, which follows from the improved
merger of Dvir and Raz~\cite{DviR}.  In this merger, the output length doesn't depend on the error.

\begin{lemma}
\label{lem:small-error}
For any $\delta > 0$, there exists $\gamma > 0$, such that for any $\eps > 0$,
there is a constant $d$ such that
there is an efficient
% family of 
rate-$(\delta \to (1-\delta),\eps)$-strong
condenser $C:\zon \times \zod \to \zom$
such that $m \geq \gamma n$.
\end{lemma}

\begin{proof}
Fix $\delta > 0$.  By \expref{Theorem}{thm:bkssw}, there is an efficient
rate-$(\delta \to 1-\delta/2, \eps_1=2^{-{\Omega(n)}})$ somewhere-condenser
$C_1:\zon \times \zo^{d_1} \to \zo^{m_1}$, where $d_1=O(1)$ and $m_1 = \Omega(n)$.
By the main theorem of~\cite{DviR}, there is a ``strong merger''
$M:(\zo^{m_1})^{2^{d_1}} \times \zo^{d} \to \zom$ with $d=f(d_1)=O(1)$ and $m=\Omega(m_1)$
such that whenever the input $X_1$ on $(\zo^{m_1})^{2^{d_1}}$ is a somewhere rate $(1-\delta/2)$-source,
then the average over $y \in \zod$ of the distance of $M(X_1,y)$ to
the closest rate $(1-\delta)$-source is at most $\eps/2$.
The length $m$ may be chosen independently of $\eps$, although $d$ depends on $\eps$.
Hence the required strong condenser is $C(x,y) = M(\angles{C_1(x,y_1}_{y_1 \in \zo^{d_1}},y)$.
\end{proof}

The following lemma is the condenser analogue to the corresponding extractor lemma in~\cite{wz}.

\begin{lemma}
\label{condense2}
Suppose that $C_1:\zon \times \zo^{d_1} \to \zo^{m_1}$ is a strong $(k \to \ell_1,\eps_1)$-condenser and
$C_2:\zon \times \zo^{d_2} \to \zo^{m_2}$ is a strong $(k-m_1-s \to \ell_2,\eps_2)$-condenser.
Then $C:\zon \times \zo^{d_1 + d_2} \to \zo^{m_1 + m_2}$, given by
\[
C(x,y_1 \circ y_2) = C_1(x,y_1) \circ C_2(x,y_2)\,,
\]
 is a strong
$(k \to \ell_1 + \ell_2,\eps_1 + \eps_2 + 2^{-s})$-condenser.
\end{lemma}
%% ToC edit: I added the word "that" in the first sentence of the lemma above to avoid spacing difficulties.

\begin{proof}
Let $X$ be a $k$-source.
For $y \in \zo^{d_i}$, let $\eps_i^y$ denote the minimum distance of $C_i(X,y)$ to some $\ell_i$-source.
Fix $y_1 \in \zo^{d_1}$.
Let $S$ denote the set of low-probability elements in the output:
\[
S = \set{z \mid \Pr_X [C_1(X,y_1) = z] \le 2^{-(m_1 + s)}}\,.
\]
Then $\Pr [C_1(X,y_1) \in S] \le |S| 2^{-(m_1+s)} \le 2^{-s}$.
For $z \not\in S$, $X$ conditioned on $C_1(X, y_1)=z$ is a $(k-m_1-s)$-source.
Hence, under such conditioning,
for each $y_2 \in \zo^{d_2}$, $C_2(X,y_2)$ is within $\eps_2^{y_2}$ of some $\ell_2$-source.
Putting this together as in \expref{Lemma}{lem:compose},
$C(X,y_1 \circ y_2)$ is within $\eps_1^{y_1} + 2^{-s} + \eps_2^{y_2}$ of some
$\ell_1 + \ell_2$-source.
Since the average of $\eps_i^{y_i}$ is at most $\eps_i$, this completes the proof of the lemma.
\end{proof}
Applying this lemma inductively, we can show:

\begin{lemma}
\label{iterate-condense}
Suppose $C:\zon \times \zo^{d} \to \zo^{m}$ is an efficient strong $(k \to \ell,\eps)$-condenser.
Then for any positive integers $s,t$, we can construct $C':\zon \times \zo^{td} \to \zo^{tm}$,
an efficient strong $(k+(t-1)m+s \to t\ell,t\eps + (t-1) 2^{-s})$-condenser.
\end{lemma}

\begin{proof}
We prove this by induction on $t$.  For the base case $t=1$ we can take $C'=C$.
Now assume the lemma for a given $t$.  Set $C_1$ to be the condenser given by the lemma for $t$,
and set $C_2=C$.  Applying \expref{Lemma}{condense2} gives the condenser for $t+1$.
\end{proof}

We can now prove \expref{Theorem}{all-condense}.
We would like to condense additional entropy, as long as there is $\alpha \delta n$ entropy
left in the source.
We also want the output entropy rate to be $1-\beta$, and if in each iteration we have this
entropy rate, then overall we do as well.  These two goals mean we should use a condenser
converting rate $\alpha \delta$ to rate $1-\beta$.
This condenser has some output length $\gamma n$, so we need to iterate $1/\gamma$ times.
This determines the error we need, which is why it is crucial we can pick the error after
knowing~$\gamma$.

\begin{proof}[Proof of \expref{Theorem}{all-condense}]
Let $\alpha, \beta, \delta, \eps > 0$ be given.
By \expref{Lemma}{lem:small-error}, for some $\gamma>0$ there is an efficient strong
rate-$(\alpha\delta \to (1- \beta),\eps')$-condenser $C:\zon \times \zod \to \zom$,
where $\eps'$ will be chosen later and $m \ge \gamma n$.
Set $t=(1-\alpha)\delta/\gamma$ and apply \expref{Lemma}{iterate-condense} with an $s$ to
be chosen later.
This gives an efficient strong $(\delta n - \gamma n + s \to (1-\beta)(tm), i \eps'+2^{-s})$-condenser
$C':\zon \times \zo^{id} \to \zo^{tm}$.
Choosing $s=\gamma n$ and $\eps'$ small enough so $t \eps'+2^{-s} \le \eps$ gives the theorem.
\end{proof}
\renewcommand{\proofname}{Proof}

\begin{sloppypar}
Combining our condenser from \expref{Theorem}{all-condense} with our extractor from \expref{Proposition}{rw-extractor} via \expref{Lemma}{lem:tuz}, we obtain our main extractor construction:
\end{sloppypar}

\smallskip \noindent
{\bf \expref{Theorem}{thm:extract}.}
For all constant $\alpha,\delta,\eps > 0$ there is an
efficient family of strong $(k = \delta n, \eps)$-extractors
$\ext: \zon \times \zod \to \zom$ with $m \ge (1-\alpha)\delta n$
and $D = 2^d = O(n)$.

\smallskip
By combining the same condenser with the earlier disperser of \expref{Proposition}{prop:rw},
we obtain our main disperser construction:

\smallskip \noindent
{\bf \expref{Theorem}{thm:disperse}.}
For all constant $\alpha,\delta > 0$ and $s=s(n)>0$,
there is an efficient family
of strong $(K=N^\delta, s)$-dispersers $\dis:[N=2^n] \times [D] \to [M=2^m]$ 
such that $D = O(n/\log s^{-1})$ and $m  \ge (1-\alpha)\delta n$.
For subconstant $\delta=\delta(n)$,
the dependence is $D = (1/\delta)^{O(1)} n/\log s^{-1}$ and $m = \delta^{O(1)} n$.

\section{Max Clique}
\label{sec:clique}

In this section, we show how our dispersers yield inapproximability
results for \clique.  We assume some familiarity with PCPs.
Since the inapproximability of \clique\ follows from the proof of the
inapproximability of \cnum,
readers not familiar with PCPs may prefer to read the next section, which doesn't use them.

Historically, Feige et al.~\cite{fglss} were the first to show how to obtain inapproximability
results for \clique\ using PCPs.  Bellare, Goldreich, and Sudan~\cite{bgs} showed that free bit complexity
is the parameter of a PCP which gives the best inapproximability results.

\begin{definition}
$\fpcp_s(r,f)$ is the class of promise problems recognized by PCP verifiers
using $r$ random bits and $f$ free bits, achieving perfect completeness
and soundness $s$.
\end{definition}

\begin{theorem}[\cite{bgs,fglss}]
\label{thm:bgs}
If $\np \subseteq \fpcp_s(r,f)$, then it is NP-hard to distinguish whether
a graph on $2^{r+f}$ vertices has clique number
at least $2^r$ or at most $s2^r$.
\end{theorem}

H{\aa}stad~\cite{Has:clique} showed how to reduce the soundness by paying only a 
tiny amount in the free bit complexity.  Specifically, he showed:

\begin{theorem}[\cite{Has:clique}]
\label{hastad}
For any $\bar{f}>0$, there is an $\ell$ such that
$\np \subseteq \fpcp_{2^{-\ell}}(O(\log n),\bar{f}\ell)$.
\end{theorem}

The quantity $\bar{f}$ is called the \emph{amortized free bit complexity}, and can be less than 1
(H{\aa}stad's result shows it can be any positive constant).
%%% ToC edit: Author check above: I added a "d" to amortize, and made this italic.

The following follows from \expref{Theorem}{thm:bgs} and
the amplification of a PCP via a good disperser, as first suggested
in~\cite{Zbpp}.

\begin{lemma}
\label{lem:disperser-clique}
Suppose $\np \subseteq \fpcp_s(r,f)$ and there is a polynomial-time
constructible $(K,s)$-disperser $\dis:[2^R] \times [D] \to [2^r]$.
Then $\np \subseteq \fpcp_{K/2^R} (R,Df)$, and
hence it is $\np$-hard to distinguish whether a graph on $2^{R+Df}$ vertices has
clique number at least $2^R$
or clique number at most $K$.
\end{lemma}

This suffices to prove our theorem.

\smallskip \noindent
{\bf \expref{Theorem}{thm:clique}}.
It is $\np$-hard to approximate {\clique} to within 
$n^{1 - \eps}$ for any $\eps > 0$.

\begin{proof}
Equivalently, we will show a factor of $n^{1-2\eps}$.
Fix $\eps > 0$.
\expref{Theorem}{thm:disperse} says that for any $s=s(n)$
there is an efficient family of
$(K=N^\eps,s)$-dispersers of degree $D \leq c(\log N)/\log s^{-1}$,
for some $c=c(\eps)$.
Let $\bar{f} \leq \eps/c$, and apply \expref{Theorem}{hastad} to get an
$\ell$ and $r=r(n)=O(\log n)$ such that 
$\np \subseteq \fpcp_{2^{-\ell}}(r,\bar{f}\ell)$.
Now let $s=2^{-\ell}$, so there is an efficient
$(K=(2^R)^{\eps},2^{-\ell})$-disperser 
$\dis:[2^R] \times [D] \to [2^r]$.
Apply \expref{Lemma}{lem:disperser-clique} with this disperser, and note that
$Df \leq (cR/\ell) \cdot  (\ell \bar{f}) = \bar{f} \cdot cR \leq \eps R$.
Hence it is NP-hard to distinguish clique number at least $2^R$ from clique number at most $2^{\eps R}$
in graphs on $2^{(1+\eps)R}$ vertices.
Moreover, since the output length is linear in the input length, $R=O(\log n)$,
so the reduction is polynomial time.
\end{proof}

To obtain inapproximability up to an $n^{1-o(1)}$ factor, 
we can use the following theorem by H{\aa}stad and Khot~\cite{HasK},
which is basically the same as that obtained by Samorodnitsky and Trevisan
\cite{SamT} but gives perfect completeness.

\begin{theorem}[\cite{HasK}]
\label{hk}
For any $\ell=\ell(n)$ which is one less than a perfect square,
\[
\np \subseteq \fpcp_{2^{-\ell}}(O(\ell \log n),2\sqrt{\ell+1})\,.
\]
\end{theorem}

We can now prove:

\smallskip \noindent
{\bf \expref{Theorem}{thm:clique-quasi}}.
For some $\gamma>0$, it is $\quasinp$-hard to
approximate \clique\ to within $n/2^{(\log n)^{1-\gamma}}$.

\begin{proof}
Set $\eps =\eps(n) = 1/\log n$.
By \expref{Theorem}{thm:disperse-weaker} (or the stronger \expref{Theorem}{thm:disperse}),
there is a $c$ such that for any $s=s(n)$
there is a polynomial-time constructible family of
$(K=N^\eps,s)$-dispersers of degree $D \leq (\log n)^c (\log N)/\log s^{-1}$.
Let $\ell = 9 (\log n)^{2(c+1)}$ and $s=2^{-\ell}$.
Apply \expref{Theorem}{hk} to get
$r=r(n)=\plog (n)$ such that 
$\np \subseteq \fpcp_s (r,3\sqrt{\ell})$.
We'll use the polynomial-time constructible
$(K=(2^R)^{\eps},2^{-\ell})$-disperser 
$\dis:[2^R] \times [D] \to [2^r]$.
Apply \expref{Lemma}{lem:disperser-clique} with disperser $\dis$, and note that
% $R=\plog (n)$ and
$Df \leq (R (\log n)^c /\ell) \cdot  (3\sqrt{\ell}) = 
R/\log n = \eps R$.
Hence it is NP-hard to distinguishing clique number at least $2^R$ from
clique number at most $2^{R/\log n}$ in graphs on $2^{(1+1/\log n)R}$ vertices.
Since $R=\plog (n)$, the theorem follows.
\end{proof}

\section{Chromatic Number}
\label{sec:cnum}

Now we show how our dispersers imply the NP-hardness of
approximating {\sc Chromatic Number} to within $n^{1-\eps}$ for
any $\eps>0$.
We derandomize
Feige's and Kilian's proof~\cite{fk} of the same inapproximability ratio
but under the stronger assumption that $\np$ is not in~$\zpp$.
As in their proof, we work with the fractional chromatic number~$\chi_f$,
which up to logarithmic factors is the same as the chromatic number~$\chi$
\cite{Lov:fractional}.
They also make use of the independence number $\alpha$.
Just as $\alpha(G)\chi(G) \ge |V(G)|$, so too is
$\alpha(G)\chi_f(G) \ge |V(G)|$ (here $V(G)$ denotes the vertices of $G$).

Feige and Kilian start with a graph $G$ (from a family of graphs) which has 
a constant hardness ratio:  either $G$ has chromatic number at
least~$c$ or at most $c'<c$.
They actually need $c' = c^\gamma$, where $\gamma>0$ is arbitrary,
as well as a corresponding bound on the independence number $\alpha$.

\begin{theorem}[\cite{fk}]
\label{fk}
For all $\gamma>0$, there is an $s>0$, such that there is
a polynomial-time reduction from an NP-complete 
language $L$ to chromatic number with the following properties.
On input $x$, the algorithm outputs a graph $G=(V,E)$ such that
\begin{enumerate}
\item
If $x \in L$ then $\chi_f(G) \le s^{-\gamma}$;
\item
If $x \not\in L$ then $\alpha(G) < s|V|$, and hence $\chi_f(G) > 1/s$.
\end{enumerate}
\end{theorem}

(The parameter $s$ is not exactly the soundness of the PCP; rather, it is
the soundness times $2^{-f}$, where $f$ is the free bit complexity.
Also, Feige and Kilian don't state this as a theorem, but it can be deduced
from their Lemma~2 and the parameters achieved in their Section~5.6.
They state their parameters as:
for any $\gamma, \ell >0$, they can set $s = O(2^{-\ell})$ and
if $x \in L$ then $\chi_f(G) \le 2^{3\gamma \ell+1}$.
This is equivalent to our statement above, for a slightly different choice
of $\gamma$.)

Feige and Kilian next amplify the hardness ratio using randomized graph
products.  
That is, they take a suitably-sized random 
subgraph $G'$ of the product graph $G^D$ 
which has hardness ratio $|V(G')|^{1-\eps}$.
$G^D$ is defined with respect to the following ``OR'' graph product.

\begin{definition}
% For a graph $G$, $V(G)$ denotes its vertices and $E(G)$ its edges.
For graphs $G=(V,E)$ and $H=(W,F)$, define the graph 
$G \times H$ as having vertex set $V \times W$, and edges
$\set{(v,w),(v',w')}$ where $\set{v,v'} \in E$ or $\set{w,w'} \in F$.
\end{definition}

Note that
$(v_1,\ldots,v_D)$ is adjacent to $(w_1,\ldots,w_D)$ in 
$G^D$ if any $(v_i,w_i)$ is an edge in $G$.
It is straightforward to show that $\alpha(G \times H) = \alpha(G) \cdot
\alpha(H)$.  Using the definition of $\chi_f$ as a linear program and
linear programming duality, Feige showed that
$\chi_f(G \times H) = \chi_f(G) \cdot \chi_f(H)$~\cite{f:theta}.

We derandomize the randomized graph powering.  This was done earlier
in the clique setting~\cite{afwz}, but the results there are not tight
enough.  On the other hand, for cliques, two types of bounds
are needed -- one if the clique number is large, and one if it's small.  
For chromatic number, one of the two cases becomes easy.
If $\chi_f(G)$ is small,
it will suffice to use the trivial bound 
$\chi_f(G') \le \chi_f(G^D) = \chi_f(G)^D$.

We can define a derandomized graph powering of $G=(V,E)$ with respect to 
any disperser $\dis: X \times [D] \to V$ as follows.
Define $\dis(x) = (\dis(x,1),\dis(x,2),\ldots,\dis(x,D))$ and
$\dis(X) = \set{\dis(x) \mid x \in X}$.
Now define the graph $\dis(G^D)$ to be the induced subgraph of $G^D$
on vertex set $\dis(X)$.

\begin{lemma}
\label{lem:chromatic}
Given a graph $G=(V,E)$ and a disperser $\dis$ with degree $D$, 
let $G' = (V',E') = \dis(G^D)$.  Then
\begin{enumerate}
\item
$\chi_f(G') \le (\chi_f(G))^D$.
\item
If $\alpha(G) < s|V|$ and $\dis$ is a strong $(K,s)$-disperser,
then $\alpha(G') < K$, and hence $\chi_f(G') > |V'|/K$.
\end{enumerate}
\end{lemma}

\begin{proof}
The first part follows because $\chi_f(G') \le \chi_f(G^D) = (\chi_f(G))^D$.
For the second part, suppose $\alpha(G') \ge K$, and let $X$ be an
independent set in $G'$ of size $K$.
Note that $\Gamma_i(X)$, as defined in \expref{Lemma}{strong-disperser},
corresponds to the set of $i$th coordinates of $X$.
By the strong disperser property,
for some $i \in [D]$, $|\Gamma_i(X)| \geq s |V| > \alpha(G)$.
Hence $\Gamma_i(X)$ is not an independent set in $G$, so it contains an edge,
say $\set{v_i,w_i}$.
If $v_i$ is the $i$th coordinate of $v$, and $w_i$ is the $i$th coordinate of $w$,
then because we are using the OR graph product, $\set{v,w}$ is an edge in $G'$.
Since $v,w \in X$, this contradicts our assumption that $X$ was an independent
set.
\end{proof}

We are now ready to prove our theorem.

\smallskip \noindent
{\bf \expref{Theorem}{thm:cnum}}.
It is $\np$-hard to approximate \cnum\ to within 
$n^{1 - \eps}$ for any $\eps > 0$.

\begin{proof}
Fix $\eps>0$.
\expref{Theorem}{thm:disperse} says that for any $s=s(n)$
there is an efficient family of
strong $(K=N^\eps,s)$-dispersers of degree $D \leq cn/\log s^{-1}$,
for some $c=c(\eps)$.
Set $\gamma=\eps/c$, and use the 
Feige-Kilian reduction, which comes with an $s=s(\gamma)$.
Using this $s$,
apply \expref{Lemma}{lem:chromatic} using an efficient strong 
$(K=N^\eps,s)$-disperser.
In polynomial time we construct a graph $G'$ on $N$ vertices
such that if $x \in L$,
\[ \chi_f(G') \le s^{-\gamma D} \leq 2^{\gamma cn}=N^\eps\,.\]
If $x \not \in L$, then $\alpha(G') \le N^\eps$, so $\chi_f(G') \ge N^{1-\eps}$.
Thus it is NP-hard to distinguish graphs with fractional chromatic number $N^\eps$
from graphs with fractional chromatic number $N^{1-\eps}$.
Converting to chromatic number loses
only a logarithmic factor, so the theorem follows.
\end{proof}

To derandomize Khot's results, we use his reduction in place of \expref{Theorem}{fk}:

\begin{theorem}[\cite{Khot:clique}]
For any $\beta>0$, there is a quasi-polynomial-time reduction from an NP-complete 
language $L$ to \cnum\ with the following properties.
On input $x$ of size $n$, the algorithm outputs a graph $G=(V,E)$ such that
\begin{enumerate}
\item
$|V| \le 2^{(\log n)^{1+3\beta}}$;
\item
If $x \in L$ then $\chi_f(G) \le 2^{(\log n)^\beta}$;
\item
If $x \not\in L$ then $\alpha(G) < 2^{-(\log n)^{2\beta}} |V|$.
\end{enumerate}
\end{theorem}

We can now show:

\smallskip
\begin{sloppypar}
 \noindent
{\bf \expref{Theorem}{thm:cnum-quasi}}.
For some $\gamma>0$, it is $\quasinp$-hard to
approximate \cnum\ to within $n/2^{(\log n)^{1-\gamma}}$.
\end{sloppypar}

\begin{proof}
We use the polynomial-time constructible
$(N^\delta, s)$-strong disperser from \expref{Theorem}{thm:disperse},
with $s=2^{-(\log n)^{2\beta}}$ and $\delta$ to be chosen shortly.
This has degree $D \le (\log N)/(\delta^c (\log n)^{2\beta})$.
Set $\delta = (\log n)^{-\beta/2c}$.
Applying \expref{Lemma}{lem:chromatic}, it is $\quasinp$-hard to distinguish between
graphs on $N$ vertices with chromatic number $N^\delta$ from those with
chromatic number $2^{(\log n)^\beta D} \le N^{(\log n)^{-\beta/2}}$.
\end{proof}

\section{Simplifying and strengthening additive number theory applications}
\label{sec:numtheory}

We now give our simple one-bit condenser and improve other lemmas
from~\cite{biw,bkssw,Bou:more}.
We first define incidences of lines and points.

\begin{definition}
% A point and line are \emph{incident} if the point lies on the line.
For $P$ a set of points and $L$ a set of lines,
$I(P,L)$ denotes the number of \emph{incidences}, i.e., the number of
ordered pairs $(p,\ell)$ where the point $p$ lies on the line $\ell$.
\end{definition}

We rely heavily on the following theorem on point-line incidences.
Bourgain, Katz, and Tao~\cite{bkt} showed how this theorem follows from the sum-product theorem
(see \expref{Section}{sec:sumprod}).
The constant 1.9 below can be increased to any constant less than 2, 
but the constant $\alpha$ will likely decrease.

\begin{theorem}[{Incidence Theorem} \cite{bkt,bgk}]
\label{incidences}
Let $F = \F_q$, where $q$ is either prime or $2^p$ for $p$ prime.
Let $P$, $L$ be sets of points and lines
in $F^2$ of cardinality at most $M \le p^{1.9}$.
Then there exists an $\alpha>0$ such that
the number of incidences
\[I(P,L) = O(M^{3/2-\alpha})\,. \]
\end{theorem}

\subsection{Condensing with one random bit}
\label{sec:bkssw}

Barak et al.~\cite{bkssw} consider a condenser which uses two extra bits of
randomness; here we show that one bit of randomness suffices.
Of course, one bit is necessary, so this is optimal.
Our proof is also simpler, proceeding directly from the \expref{Incidence Theorem}{incidences}.
There is nothing special about the constant .9 below; any constant 
less than 1 will do.

Our condenser is simple to describe.
We work over a field $F=\F_q$, where $q=2^p$ for $p$ prime.
Define the point-line incidence graph as the bipartite graph $G=(V,W,E)$ with vertices
$V=F^2$ the set of points, and $W$ the set of lines over $F$, and $(p,\ell)$ is an edge
iff $p$ and $\ell$ are incident.
Our condenser is based on the function $h: E \rightarrow V \times W$
which maps an edge to its two endpoints.
An equivalent view of $h$ is the map from $F^3$ to $(F^2)^2$
which maps $(a,b,c)$ to $((b,ab+c),(a,c))$.
This is because the point $(b,ab+c)$ lies on the line $y=ax+c$.

Our condenser $C:F^3 \times \zo \rightarrow F^2$ is simply $C(e,i) = h(e)_i$.
The two-bit condenser of Barak, et al.\ is very similar:
their corresponding $h$ maps $(a,b,c)$ to the length 4 vector $(a,b,c,ab+c)$.

\begin{theorem}
\label{simple-bkssw}
Suppose $\delta \leq .9$ and $q^\delta = \omega(1)$.
The function~$C$ above
is a rate-$(\delta \to (1+\alpha/2)\delta,\eps)$-somewhere-condenser,
where $\eps = q^{-\alpha \delta/20}$.
Here $\alpha$ is the constant from the \expref{Incidence Theorem}{incidences}.
\end{theorem}

Before we proceed, it is convenient to introduce a modified notion of
somewhere-random source, which we call somewhere light.

\begin{definition}
A vector of sources $X=(X_1,\ldots,X_\ell)$ is {\em $\eps$-close
to somewhere-$k$-light}
if the probability, when $(x_1,\ldots,x_\ell)$ is output according to $X$, that no $x_i$ are light
is at most $\eps$.  We say $x_i$ is light if $\Pr[X_i=x_i]  \leq 2^{-k}$.
\end{definition}

The following lemma describes the relationship between this notion and that of somewhere-random.

\begin{lemma}
\label{modsense}
Assume $\ell 2^{-t} < 1-\eps$.
If $X=(X_1,\ldots,X_\ell)$ is $\eps$-close to somewhere-$k$-light,
then $X$ is $((\ell - 1) 2^{-t} + \eps)$-close to a somewhere-$(k-t)$-source.
\end{lemma}

\begin{proof}
Partition the support of $X$ into $\ell+1$ bins so that bin $i$ contains vectors $(x_1,\ldots,x_\ell)$
where $x_i$ is light (break ties arbitrarily), and bin 0 contains vectors with no light coordinates.
The probability of a bin is the sum of the probabilities of vectors in the bin.
By assumption, bin 0 has probability at most $\eps$.
Let $\bin(x)$ denote the bin of $x$.
Consider any bin $i \neq 0$ with probability at least $2^{-t}$
(since $\ell 2^{-t} < 1-\eps$ there is at least one such bin).
For any $(x_1,\ldots,x_\ell)$ in bin~$i$,
\[ \Pr[X_i = x_i \mid \bin(X) = i] \leq \Pr[X_i = x_i]/\Pr[ \bin(X) = i] \leq 2^t \cdot 2^{-k}\,. \]
Hence, if we let $Y^i$ denote the distribution of $X$ conditional on $\bin(X)=i$, we get that
$Y^i_i$ has min-entropy at least $k-t$, and hence $Y^i$ is a somewhere~$(k-t)$-source.
For any bin $i$ with probability less than $2^{-t}$, and for $i=0$, let $Y^i$ be the uniform distribution.
Define the distribution $Y=\sum_i \Pr[\bin(X)=i] Y^i$.
Then $Y$ is a somewhere~$(k-t)$-source and the distance of $X$ to $Y$ comes only from bin 0 and
bins with low probability, and is at most $\eps + (\ell - 1) 2^{-t}$.
\end{proof}

We now work with the modified notion.
The main idea is to convert the statistical problem to a counting problem,
which we do via the following lemma.

\begin{lemma}
\label{lem:2sets}
If $(X,Y)$ is not $\eps$-close to a somewhere-$k$-source, then 
there exists sets $S \subseteq \supp(X)$,$T \subseteq \supp(Y)$,
$|S|,|T| < 2^{k+1}/\eps$, such that
\[ \Pr[X \in S \wedge Y \in T] > \eps/2\,. \]
\end{lemma}

\begin{proof}
Let $r=k+\lg(2/\eps)$.
By \expref{Lemma}{modsense}, $(X,Y)$ is not $\eps/2$-close to somewhere-$r$-light.
Setting $S = \set{s \mid X(s) > 2^{-r}}$
and $T = \set{t \mid Y(t) > 2^{-r}}$ yields the lemma.
\end{proof}

We can now prove the theorem.

\begin{proof}[Proof of \expref{Theorem}{simple-bkssw}]
Instead of $C$, we analyze the equivalent function~$h$.
We may assume that the input to $h$
is uniform on a set of edges of size $K=2^k = q^{3\delta}$, and
set $k'=(1+\alpha/2)(2k/3)$.
Suppose the output $(X,Y)$ of $h$ is not $\eps$-close to a somewhere-$k'$-source.
Let $P=S$ and $L=T$ be the sets of size less than $K_0=2^{k'+1}/\eps$
given by \expref{Lemma}{lem:2sets}.
Assuming without loss of generality that $\alpha \leq .1$,
note that $K_0 \leq q^{2\delta}{1+\alpha/2} \leq q^{1.8 \cdot 1.05} < q^{1.9}$.

We calculate the number of incidences $I(P,L)$ in two ways.  On the one hand,
since each edge is an incident point-line pair,
and at least $\eps/2$ fraction of these pairs lie in $P \times L$,
the number of incidences
$I(P,L) \ge \eps K/2$.
On the other hand, by the \expref{Incidence Theorem}{incidences},
\[
I(P,L) = O( K_0^{3/2 - \alpha}) = O(K^{(1+\alpha/2)(3/2 - \alpha)2/3}/\eps^2)
= O(K^{1 - \alpha/6}/\eps^2)\,.
\]
Combining these,
we get a contradiction for $\eps = K^{-\alpha/20}$, and the theorem is proved.
\end{proof}
\renewcommand{\proofname}{Proof}

\subsection{AB+C theorem from two sources}
\label{sec:biw}

In this section and the next, we consider a scenario where we have several
independent weak sources, but no truly random seed.
The sum-product theorem implies that if $A$, $B$, and $C$ are sets of the same size~$K$,
then the set $AB+C$ is noticably bigger than $K$.
Barak et al.~\cite{biw} showed the significantly stronger statistical statement:
if $A$, $B$, and $C$ are independent distributions with min-entropy~$k$ each, 
then the entropy rate of $AB+C$ is noticably larger than~$k$.

Here we show how to improve the entropy rate with just two sources, by allowing $A$ and $C$ to be
correlated.  Our proof is also simpler than that in~\cite{biw}.
Again, there is nothing special about the constant .9 below; any constant less
than 1 will do.

\begin{theorem}
\label{simple-biw}
Suppose $\delta \leq .9$ and $q^\delta = \omega(1)$.
If $(A,C)$ and $B$ are output from independent rate-$\delta$-sources,
where $A,B,C$ are elements of a field $F=\F_q$, where $q$ is prime
or $2^p$ where $p$ is prime.
Then $AB+C$ is $q^{-\alpha \delta /2}$-close to a 
rate-$(1 + \alpha)\delta$-source,
where $\alpha$ is the constant from the \expref{Incidence Theorem}{incidences}.
\end{theorem}

We prove this using the \expref{Incidence Theorem}{incidences}.
The relevance of lines comes in viewing $(a,c)$ as the line $y=ax+c$.
In order to get a suitable set of points,
we use the following simple lemma.
This lemma is key in deducing a statistical theorem, which is about distributions,
from the \expref{Incidence Theorem}{incidences}, which just bounds set sizes.

\begin{lemma}
\label{lem:sets}
Suppose $X$ is $\eps$-far from a $k$-source.
Then $\exists S \subseteq \supp(X)$, $|S|< 2^k$, such that
$\Pr[X \in S] \ge \eps$.
\end{lemma}

\begin{proof}
Take $S = \set{s \mid X(s) > 2^{-k}}$, so $|S|<2^k$.  \expref{Lemma}{lem:close} implies that the
distance of $X$ to the closest $k$-source is $\sum_{s \in S} (X(s) - 2^{-k}) \leq \Pr[X \in S]$.
\end{proof}

We can now prove the theorem by taking the set of points to be $B \times S$.

\begin{proof}[Proof of \expref{Theorem}{simple-biw}]
Let $(A,C)$ be output from a flat $2k$-source,
and $B$ from an independent flat $k$-source.
Suppose $AB+C$ is $\eps$-far from a $k'$ source, where $k' = (1+\alpha)k$.
Let $S$ be the set of size less than $K' = 2^{k'}$ given 
by \expref{Lemma}{lem:sets}.
Define the set of lines $L$ to be the support of $(A,C)$, where
$(a,c)$ is associated with the line $ax+c$.
Let $P$ be the set of points $\supp(B) \times S$.

We calculate the number of incidences in two different ways.
On the one hand, 
note that when the line $(a,c)$ applied to $b$ lands in $S$, it corresponds
to an incidence.
Since $\Pr[AB+C \in S] \ge \eps$, and since the distributions are flat,
\[ I(P,L) \ge \eps |L| \cdot |\supp(B)| = \eps K^3\,, \]
where $K=2^k \leq |F|^{.9}$.
On the other hand, since $|L| = K^2 \leq |F|^{1.8}$ and
$|P| \le K \cdot K' = K^{2+\alpha} \leq |F|^{1.9}$,
by the \expref{Incidence Theorem}{incidences}
\[ I(P,L) = O(K^{(2+\alpha)(3/2 - \alpha)}) = o(K^{3-\alpha/2})\,.\]
Hence we may take $\eps = K^{-\alpha/2}$ and the theorem follows.
\end{proof}


\subsection{Rate-improving function for two equal-length sources}

Note that the previous theorem improves the rate from two independent sources,
where one has twice the length of the other.
In this subsection, we do this from two sources of equal length,
by giving a statistical version of a theorem by Bourgain.
Bourgain~\cite{Bou:more} showed that for a prime $q$, the function
$g:\F_q \times \F_q \to \F_q$ given by $g(x,y) = x(x+y)$ has the following ``expanding'' property.
For $|A|\ge |B| \geq q^\delta$, $\delta<1$,
$g(A,B) \geq q^{\delta + \beta}$ for some $\beta=\beta(\delta) > 0$.\footnote{Bourgain's proof
also uses the \expref{Incidence Theorem}{incidences}, but it was done independently of our use of the \expref{Incidence Theorem}{incidences} in Subsections~\ref{sec:bkssw} and~\ref{sec:biw}.}
With the new sum-product theorem holding also for~$q=2^p$, $p$~prime, Bourgain's theorem will
also hold in this case.

We show the statistical analogue of this theorem.
Equivalently, our theorem says that the $AB+C$ theorem holds when $C=A^2$,
and furthermore the entropy rate is measured with respect to the length of $A$, rather than $(A,C)$.

\begin{theorem}
Suppose $\delta \leq .9$ and $q^\delta = \omega(1)$.
If $X,Y$ are output
from independent rate-$\delta$-sources on $F=\F_q$,
then $g(X,Y)$ is $q^{-\alpha \delta /4}$-close to 
a rate-$(1+\alpha/2)\delta$-source.
Here $\alpha$ is the constant from the \expref{Incidence Theorem}{incidences}.
\end{theorem}

\begin{proof}
We follow Bourgain's proof, but some care is required to make it statistical.
Let $X$ and $Y$ be independent random variables
uniformly distributed over sets $A$ and $B$ of size $q^\delta$.
Assume without loss of generality that they don't contain 0.
Suppose $g(X,Y)$ is not $\eps$-close to a $\delta + \beta$-source, where we will choose
$\beta$ and $\eps$ later.  
Let 
\[
S=\set{z \in F \mid \Pr[g(X,Y)=z] > q^{-(\delta+\beta)}}\,,
\]
\ie, $S$ is the set of size less than $q^{\delta+\beta}$ 
guaranteed by \expref{Lemma}{lem:sets}, such that $\Pr[g(X,Y) \in S] \geq \eps$.

The difficulty in proving the theorem is that directly, this probability being at least $\eps$
does not give many lines (in $y$), so we cannot apply the
\expref{Incidence Theorem}{incidences}.  We follow Bourgain and find many more lines by
exploiting the linearity in $y$.

To this end, we begin with a collision probability lower bound.
Let 
\[
T=\set{y \mid \Pr[g(X,y) \in S] \ge \eps/2}\,.
\]
Then $|T| \ge \frac{\eps}{2}|B| \ge \frac{\eps}{2}q^\delta$.
Fix $y' \in T$.  Let $Z$ be distributed as $X$, but independent of
$X$ and $Y$.  Then
\[ \Pr_{X,Y,Z}[g(X,Y) = g(Z,y')] 
> \Pr_{Z}[g(Z,y') \in S] q^{-(\delta+\beta)}
\ge \frac{\eps}{2}q^{-(\delta+\beta)}\,.
\]

Let $X_1$ also be distributed as $X$, but independent of all previously
defined random variables.
We now show that a function in both $X$, $X_1$, and $Y$, which is linear in~$Y$, still
has significant probability of being in~$S$.  This will give us many more lines.

\begin{eqnarray*}
\Pr_{X,X_1,Z,Y}[ X(X+\frac{X_1}{Z} (X_1 + Y) - Z) \in S]
&\ge& \sum_{y' \in T} \Pr[X(X+y') \in S] \Pr[\frac{X_1}{Z} (X_1 + Y) - Z = y']\\
&=& \sum_{y' \in T} \Pr[X(X+y') \in S] \Pr[X_1 (X_1 + Y) = Z(Z+y')]\\
&>& |T| \frac{\eps}{2} \cdot \frac{\eps}{2}q^{-(\delta+\beta)}
> \frac{\eps^3}{8}q^{-\beta}\,.
\end{eqnarray*}

Therefore, there is a fixed $z$ such that
\begin{equation}
\label{prob-bourgain}
\Pr_{X,X_1,Y}[ X(X+\frac{X_1}{z} (X_1 + Y) - z) \in S] > \frac{\eps^3}{8}q^{-\beta}\,.
\end{equation}

This says there are many lines (linear in $y$) which, 
when applied to many values of $y$, land in $S$.  This will contradict
the \expref{Incidence Theorem}{incidences}.  In particular, 
let $\ell_{x,x_1}(y)$ denote the line
\[
\frac{x x_1}{z} y + \bigl(x^2 + \frac{x x_1^2}{z} - zx\bigr)\,,
\]
and let $L$ denote
the set of all such lines as $x,x_1$ range over $A$.

Of course, $|L| \le |A|^2 = q^{2\delta}$.
We also show $|L| \ge q^{2\delta}/3$ 
by observing that,
for fixed $z,w$,
there are at most 3 nonzero solutions in $x,x_1$ to
\begin{eqnarray*}
z &=& \frac{x x_1}{z} \\
w &=& x^2 + \frac{x x_1^2}{z} -zx\,.
\end{eqnarray*}

We define the points $P = B \times S$, so $|P| < q^{2\delta + \beta}$.
By Equation~\eqref{prob-bourgain} and the fact that the pairs $(x,x_1)$
overcount lines by at most a factor of 3, 
the number of incidences is at least
$\frac{\eps^3}{24}q^{3\delta-\beta}$.
By the \expref{Incidence Theorem}{incidences}, the number of incidences is
$O(q^{(2\delta + \beta)(3/2 - \alpha)})$.
Comparing these gives the theorem,
with $\beta=\alpha \delta/2$ and $\eps=q^{-\alpha \delta/4}$.

\end{proof}

\section*{Acknowledgements}

I am grateful to Avi Wigderson for our joint work on \expref{Section}{avi-section}, as well
as for other useful discussions.  I also thank
Ronen Shaltiel, Anup Rao, Mike Saks, 
Salil Vadhan, Madhu Sudan, and Jesse Kamp for helpful discussions
and comments.
Finally, I thank the anonymous referees for careful readings and very helpful comments.

\bibliographystyle{tocplain}
\bibliography{a006}

\begin{tocauthors}
\begin{tocinfo}[david]
   David Zuckerman \tocabout{}\\
Department of Computer Science\\
University of Texas at Austin\\
1 University Station C0500\\
Austin, TX  78712\\
   diz\tocat{}cs\tocdot{}utexas\tocdot{}edu\\
   \url{http://www.cs.utexas.edu/~diz}
\end{tocinfo}
\end{tocauthors}

\begin{tocaboutauthors}
   \begin{tocabout}[david]
{\sc David Zuckerman} received his \phd\ from U.C.\ Berkeley
in 1991 under the supervision of Umesh Vazirani.  Since 1994 he has
been on the faculty of the University of Texas at Austin.
His research interests lie primarily in the role of randomness in computation, 
particularly pseudorandomness and its relationship to other topics in complexity theory,
coding theory, and cryptography.
His hobbies include dancing and Scrabble.
   \end{tocabout}
\end{tocaboutauthors}

\end{document}
